aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorLucas De Marchi <lucas.demarchi@intel.com>2025-07-10 13:33:47 -0700
committerLucas De Marchi <lucas.demarchi@intel.com>2025-07-14 13:20:02 -0700
commite4cb5823ba3e2668ef5c164898e2aa2c0ad73742 (patch)
tree36b169e92e9418ed76651f2aa60eedb518d83641 /drivers/gpu
parentdrm/xe/lrc: Reduce scope of empty lrc data (diff)
downloadlinux-e4cb5823ba3e2668ef5c164898e2aa2c0ad73742.tar.gz
linux-e4cb5823ba3e2668ef5c164898e2aa2c0ad73742.zip
drm/xe: Count dwords before allocating
The bb allocation in emit_wa_job() is wrong in 2 ways: first it's allocating enough space for the 3DSTATE or hardcoding 4k depending on the engine. In the first case it doesn't account for the WAs and in the former it may not be sufficient. Secondly it's using the size instead of number of dwords, causing the buffer to be 4x bigger than needed: xe_bb_new() receives number of dwords as parameter and its declaration was also not following its implementation. Lastly, reword the debug message since it's not only about the LRC WAs anymore as it also include the 3DSTATE for render. While it's unlikely this is causing any real issue, let's calculate the needed space and allocate just enough. Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com> Link: https://lore.kernel.org/r/20250710-lrc-refactors-v2-2-a5e2ca03f6bd@intel.com Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/xe/xe_bb.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c38
2 files changed, 25 insertions, 15 deletions
diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h
index fafacd73dcc3..b5cc65506696 100644
--- a/drivers/gpu/drm/xe/xe_bb.h
+++ b/drivers/gpu/drm/xe/xe_bb.h
@@ -14,7 +14,7 @@ struct xe_gt;
struct xe_exec_queue;
struct xe_sched_job;
-struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm);
+struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm);
struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
struct xe_bb *bb);
struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 9dad4f79328e..134d430cce73 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -189,16 +189,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
long timeout;
int count_rmw = 0;
int count = 0;
-
- if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
- /* Big enough to emit all of the context's 3DSTATE */
- bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false);
- else
- /* Just pick a large BB size */
- bb = xe_bb_new(gt, SZ_4K, false);
-
- if (IS_ERR(bb))
- return PTR_ERR(bb);
+ size_t bb_len = 0;
/* count RMW registers as those will be handled separately */
xa_for_each(&sr->xa, idx, entry) {
@@ -208,11 +199,30 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
++count_rmw;
}
- if (count || count_rmw)
- xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name);
+ if (count)
+ bb_len += count * 2 + 1;
+
+ if (count_rmw)
+ bb_len += count_rmw * 20 + 7;
+
+ if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
+ /*
+ * Big enough to emit all of the context's 3DSTATE via
+ * xe_lrc_emit_hwe_state_instructions()
+ */
+ bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32);
+
+ xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len);
+
+ bb = xe_bb_new(gt, bb_len, false);
+ if (IS_ERR(bb))
+ return PTR_ERR(bb);
if (count) {
- /* emit single LRI with all non RMW regs */
+ /*
+ * Emit single LRI with all non RMW regs: 1 leading dw + 2dw per
+ * reg + 1
+ */
bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
@@ -236,7 +246,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
}
if (count_rmw) {
- /* emit MI_MATH for each RMW reg */
+ /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */
xa_for_each(&sr->xa, idx, entry) {
if (entry->reg.masked || entry->clr_bits == ~0)