From 7e5516e60961248bbde7381038ff74bdbf6c565e Mon Sep 17 00:00:00 2001 From: Ankit Soni Date: Wed, 4 Jun 2025 06:13:24 +0000 Subject: iommu/amd: Add HATDis feature support Current AMD IOMMU assumes Host Address Translation (HAT) is always supported, and Linux kernel enables this capability by default. However, in case of emulated and virtualized IOMMU, this might not be the case. For example,current QEMU-emulated AMD vIOMMU does not support host translation for VFIO pass-through device, but the interrupt remapping support is required for x2APIC (i.e. kvm-msi-ext-dest-id is also not supported by the guest OS). This would require the guest kernel to boot with guest kernel option iommu=pt to by-pass the initialization of host (v1) table. The AMD I/O Virtualization Technology (IOMMU) Specification Rev 3.10 [1] introduces a new flag 'HATDis' in the IVHD 11h IOMMU attributes to indicate that HAT is not supported on a particular IOMMU instance. Therefore, modifies the AMD IOMMU driver to detect the new HATDis attributes, and disable host translation and switch to use guest translation if it is available. Otherwise, the driver will disable DMA translation. [1] https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/specifications/48882_IOMMU.pdf Reviewed-by: Suravee Suthikulpanit Signed-off-by: Ankit Soni Link: https://lore.kernel.org/r/8109b208f87b80e400c2abd24a2e44fcbc0763a5.1749016436.git.Ankit.Soni@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 1 + drivers/iommu/amd/amd_iommu_types.h | 6 +++++- drivers/iommu/amd/init.c | 31 +++++++++++++++++++++++++++++-- drivers/iommu/amd/iommu.c | 13 +++++++++++++ 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 29a8864381c3..fddfad4a9009 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -43,6 +43,7 @@ extern int amd_iommu_guest_ir; extern enum protection_domain_mode amd_iommu_pgtable; extern int amd_iommu_gpt_level; extern unsigned long amd_iommu_pgsize_bitmap; +extern bool amd_iommu_hatdis; /* Protection domain ops */ void amd_iommu_init_identity_domain(void); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index ccbab3a4811a..69291cef73f7 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -460,6 +460,9 @@ /* IOMMU Feature Reporting Field (for IVHD type 10h */ #define IOMMU_FEAT_GASUP_SHIFT 6 +/* IOMMU HATDIS for IVHD type 11h and 40h */ +#define IOMMU_IVHD_ATTR_HATDIS_SHIFT 0 + /* IOMMU Extended Feature Register (EFR) */ #define IOMMU_EFR_XTSUP_SHIFT 2 #define IOMMU_EFR_GASUP_SHIFT 7 @@ -558,7 +561,8 @@ struct amd_io_pgtable { }; enum protection_domain_mode { - PD_MODE_V1 = 1, + PD_MODE_NONE, + PD_MODE_V1, PD_MODE_V2, }; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 9c17dfa76703..2c181dfa23e0 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -168,6 +168,9 @@ static int amd_iommu_target_ivhd_type; u64 amd_iommu_efr; u64 amd_iommu_efr2; +/* Host (v1) page table is not supported*/ +bool amd_iommu_hatdis; + /* SNP is enabled on the system? */ bool amd_iommu_snp_en; EXPORT_SYMBOL(amd_iommu_snp_en); @@ -1792,6 +1795,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; + if (h->efr_attr & BIT(IOMMU_IVHD_ATTR_HATDIS_SHIFT)) { + pr_warn_once("Host Address Translation is not supported.\n"); + amd_iommu_hatdis = true; + } + early_iommu_features_init(iommu, h); break; @@ -2112,7 +2120,15 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) return ret; } - iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); + ret = iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); + if (ret || amd_iommu_pgtable == PD_MODE_NONE) { + /* + * Remove sysfs if DMA translation is not supported by the + * IOMMU. Do not return an error to enable IRQ remapping + * in state_next(), DTE[V, TV] must eventually be set to 0. + */ + iommu_device_sysfs_remove(&iommu->iommu); + } return pci_enable_device(iommu->dev); } @@ -2573,7 +2589,7 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) u32 devid; struct dev_table_entry *dev_table = pci_seg->dev_table; - if (dev_table == NULL) + if (!dev_table || amd_iommu_pgtable == PD_MODE_NONE) return; for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { @@ -3084,6 +3100,17 @@ static int __init early_amd_iommu_init(void) } } + if (amd_iommu_hatdis) { + /* + * Host (v1) page table is not available. Attempt to use + * Guest (v2) page table. + */ + if (amd_iommu_v2_pgtbl_supported()) + amd_iommu_pgtable = PD_MODE_V2; + else + amd_iommu_pgtable = PD_MODE_NONE; + } + /* Disable any previously enabled IOMMUs */ if (!is_kdump_kernel() || amd_iommu_disabled) disable_iommus(); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3117d99cf83d..8a9babd6dfa7 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2424,6 +2424,13 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) pci_max_pasids(to_pci_dev(dev))); } + if (amd_iommu_pgtable == PD_MODE_NONE) { + pr_warn_once("%s: DMA translation not supported by iommu.\n", + __func__); + iommu_dev = ERR_PTR(-ENODEV); + goto out_err; + } + out_err: iommu_completion_wait(iommu); @@ -2511,6 +2518,9 @@ static int pdom_setup_pgtable(struct protection_domain *domain, case PD_MODE_V2: fmt = AMD_IOMMU_V2; break; + case PD_MODE_NONE: + WARN_ON_ONCE(1); + return -EPERM; } domain->iop.pgtbl.cfg.amd.nid = dev_to_node(dev); @@ -2532,6 +2542,9 @@ static inline u64 dma_max_address(enum protection_domain_mode pgtable) static bool amd_iommu_hd_support(struct amd_iommu *iommu) { + if (amd_iommu_hatdis) + return false; + return iommu && (iommu->features & FEATURE_HDSUP); } -- cgit v1.2.3 From 025d1371cc8c852ae1b3c2916cf7403902346350 Mon Sep 17 00:00:00 2001 From: Ankit Soni Date: Wed, 4 Jun 2025 06:13:25 +0000 Subject: iommu/amd: Add efr[HATS] max v1 page table level The EFR[HATS] bits indicate maximum host translation level supported by IOMMU. Adding support to set the maximum host page table level as indicated by EFR[HATS]. If the HATS=11b (reserved), the driver will attempt to use guest page table for DMA API. Reviewed-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Signed-off-by: Ankit Soni Link: https://lore.kernel.org/r/df0f8562c2a20895cc185c86f1a02c4d826fd597.1749016436.git.Ankit.Soni@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 1 + drivers/iommu/amd/amd_iommu_types.h | 1 + drivers/iommu/amd/init.c | 16 ++++++++++++++++ drivers/iommu/amd/io_pgtable.c | 4 ++-- drivers/iommu/amd/iommu.c | 2 +- 5 files changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index fddfad4a9009..0bf3744c7b3a 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -42,6 +42,7 @@ int amd_iommu_enable_faulting(unsigned int cpu); extern int amd_iommu_guest_ir; extern enum protection_domain_mode amd_iommu_pgtable; extern int amd_iommu_gpt_level; +extern u8 amd_iommu_hpt_level; extern unsigned long amd_iommu_pgsize_bitmap; extern bool amd_iommu_hatdis; diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 69291cef73f7..35ee7b0648af 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -94,6 +94,7 @@ #define FEATURE_GA BIT_ULL(7) #define FEATURE_HE BIT_ULL(8) #define FEATURE_PC BIT_ULL(9) +#define FEATURE_HATS GENMASK_ULL(11, 10) #define FEATURE_GATS GENMASK_ULL(13, 12) #define FEATURE_GLX GENMASK_ULL(15, 14) #define FEATURE_GAM_VAPIC BIT_ULL(21) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 2c181dfa23e0..cadb2c735ffc 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -152,6 +152,8 @@ bool amd_iommu_dump; bool amd_iommu_irq_remap __read_mostly; enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1; +/* Host page table level */ +u8 amd_iommu_hpt_level; /* Guest page table level */ int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL; @@ -3049,6 +3051,7 @@ static int __init early_amd_iommu_init(void) struct acpi_table_header *ivrs_base; int ret; acpi_status status; + u8 efr_hats; if (!amd_iommu_detected) return -ENODEV; @@ -3093,6 +3096,19 @@ static int __init early_amd_iommu_init(void) FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL) amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; + efr_hats = FIELD_GET(FEATURE_HATS, amd_iommu_efr); + if (efr_hats != 0x3) { + /* + * efr[HATS] bits specify the maximum host translation level + * supported, with LEVEL 4 being initial max level. + */ + amd_iommu_hpt_level = efr_hats + PAGE_MODE_4_LEVEL; + } else { + pr_warn_once(FW_BUG "Disable host address translation due to invalid translation level (%#x).\n", + efr_hats); + amd_iommu_hatdis = true; + } + if (amd_iommu_pgtable == PD_MODE_V2) { if (!amd_iommu_v2_pgtbl_supported()) { pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 4d308c071134..a91e71f981ef 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -125,7 +125,7 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable, goto out; ret = false; - if (WARN_ON_ONCE(pgtable->mode == PAGE_MODE_6_LEVEL)) + if (WARN_ON_ONCE(pgtable->mode == amd_iommu_hpt_level)) goto out; *pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root)); @@ -526,7 +526,7 @@ static void v1_free_pgtable(struct io_pgtable *iop) /* Page-table is not visible to IOMMU anymore, so free it */ BUG_ON(pgtable->mode < PAGE_MODE_NONE || - pgtable->mode > PAGE_MODE_6_LEVEL); + pgtable->mode > amd_iommu_hpt_level); free_sub_pt(pgtable->root, pgtable->mode, &freelist); iommu_put_pages_list(&freelist); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 8a9babd6dfa7..9c67f0be2b35 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2534,7 +2534,7 @@ static int pdom_setup_pgtable(struct protection_domain *domain, static inline u64 dma_max_address(enum protection_domain_mode pgtable) { if (pgtable == PD_MODE_V1) - return ~0ULL; + return PM_LEVEL_SIZE(amd_iommu_hpt_level); /* V2 with 4/5 level page table */ return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1); -- cgit v1.2.3 From c694bc8b612ddd0dd70e122a00f39cb1e2e6927f Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Thu, 3 Jul 2025 08:54:33 -0700 Subject: iommu/amd: Enable PASID and ATS capabilities in the correct order Per the PCIe spec, behavior of the PASID capability is undefined if the value of the PASID Enable bit changes while the Enable bit of the function's ATS control register is Set. Unfortunately, pdev_enable_caps() does exactly that by ordering enabling ATS for the device before enabling PASID. Cc: Suravee Suthikulpanit Cc: Vasant Hegde Cc: Jason Gunthorpe Cc: Jerry Snitselaar Fixes: eda8c2860ab679 ("iommu/amd: Enable device ATS/PASID/PRI capabilities independently") Signed-off-by: Easwar Hariharan Reviewed-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250703155433.6221-1-eahariha@linux.microsoft.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 9c67f0be2b35..d2d1deabf7e2 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -634,8 +634,8 @@ static inline void pdev_disable_cap_pasid(struct pci_dev *pdev) static void pdev_enable_caps(struct pci_dev *pdev) { - pdev_enable_cap_ats(pdev); pdev_enable_cap_pasid(pdev); + pdev_enable_cap_ats(pdev); pdev_enable_cap_pri(pdev); } -- cgit v1.2.3 From ad48b1dd14fb217e0a0b0af46744a3d1f6f26dea Mon Sep 17 00:00:00 2001 From: Dheeraj Kumar Srivastava Date: Wed, 2 Jul 2025 15:07:57 +0530 Subject: iommu/amd: Refactor AMD IOMMU debugfs initial setup Rearrange initial setup of AMD IOMMU debugfs to segregate per IOMMU setup and setup which is common for all IOMMUs. This ensures that common debugfs paths (introduced in subsequent patches) are created only once instead of being created for each IOMMU. With the change, there is no need to use lock as amd_iommu_debugfs_setup() will be called only once during AMD IOMMU initialization. So remove lock acquisition in amd_iommu_debugfs_setup(). Signed-off-by: Dheeraj Kumar Srivastava Reviewed-by: Suravee Suthikulpanit Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20250702093804.849-2-dheerajkumar.srivastava@amd.com Signed-off-by: Will Deacon --- drivers/iommu/amd/amd_iommu.h | 4 ++-- drivers/iommu/amd/debugfs.c | 16 +++++++--------- drivers/iommu/amd/init.c | 5 ++--- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 0bf3744c7b3a..9b4b589a54b5 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -28,9 +28,9 @@ void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp, size_t size); #ifdef CONFIG_AMD_IOMMU_DEBUGFS -void amd_iommu_debugfs_setup(struct amd_iommu *iommu); +void amd_iommu_debugfs_setup(void); #else -static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {} +static inline void amd_iommu_debugfs_setup(void) {} #endif /* Needed for interrupt remapping */ diff --git a/drivers/iommu/amd/debugfs.c b/drivers/iommu/amd/debugfs.c index 545372fcc72f..ff9520e002be 100644 --- a/drivers/iommu/amd/debugfs.c +++ b/drivers/iommu/amd/debugfs.c @@ -13,20 +13,18 @@ #include "amd_iommu.h" static struct dentry *amd_iommu_debugfs; -static DEFINE_MUTEX(amd_iommu_debugfs_lock); #define MAX_NAME_LEN 20 -void amd_iommu_debugfs_setup(struct amd_iommu *iommu) +void amd_iommu_debugfs_setup(void) { + struct amd_iommu *iommu; char name[MAX_NAME_LEN + 1]; - mutex_lock(&amd_iommu_debugfs_lock); - if (!amd_iommu_debugfs) - amd_iommu_debugfs = debugfs_create_dir("amd", - iommu_debugfs_dir); - mutex_unlock(&amd_iommu_debugfs_lock); + amd_iommu_debugfs = debugfs_create_dir("amd", iommu_debugfs_dir); - snprintf(name, MAX_NAME_LEN, "iommu%02d", iommu->index); - iommu->debugfs = debugfs_create_dir(name, amd_iommu_debugfs); + for_each_iommu(iommu) { + snprintf(name, MAX_NAME_LEN, "iommu%02d", iommu->index); + iommu->debugfs = debugfs_create_dir(name, amd_iommu_debugfs); + } } diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index cadb2c735ffc..7b5af6176de9 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3419,7 +3419,6 @@ int amd_iommu_enable_faulting(unsigned int cpu) */ static int __init amd_iommu_init(void) { - struct amd_iommu *iommu; int ret; ret = iommu_go_to_state(IOMMU_INITIALIZED); @@ -3433,8 +3432,8 @@ static int __init amd_iommu_init(void) } #endif - for_each_iommu(iommu) - amd_iommu_debugfs_setup(iommu); + if (!ret) + amd_iommu_debugfs_setup(); return ret; } -- cgit v1.2.3 From 7a4ee419e8c144b747a8915856e91a034d7c8f34 Mon Sep 17 00:00:00 2001 From: Dheeraj Kumar Srivastava Date: Wed, 2 Jul 2025 15:07:58 +0530 Subject: iommu/amd: Add debugfs support to dump IOMMU MMIO registers Analyzing IOMMU MMIO registers gives a view of what IOMMU is configured with on the system and is helpful to debug issues with IOMMU. eg. -> To get mmio registers value at offset 0x18 for iommu (say, iommu00) # echo "0x18" > /sys/kernel/debug/iommu/amd/iommu00/mmio # cat /sys/kernel/debug/iommu/amd/iommu00/mmio Signed-off-by: Dheeraj Kumar Srivastava Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20250702093804.849-3-dheerajkumar.srivastava@amd.com Signed-off-by: Will Deacon --- drivers/iommu/amd/amd_iommu_types.h | 1 + drivers/iommu/amd/debugfs.c | 47 +++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 35ee7b0648af..e3148d7ddffe 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -795,6 +795,7 @@ struct amd_iommu { #ifdef CONFIG_AMD_IOMMU_DEBUGFS /* DebugFS Info */ struct dentry *debugfs; + int dbg_mmio_offset; #endif /* IOPF support */ diff --git a/drivers/iommu/amd/debugfs.c b/drivers/iommu/amd/debugfs.c index ff9520e002be..c025b4d2398d 100644 --- a/drivers/iommu/amd/debugfs.c +++ b/drivers/iommu/amd/debugfs.c @@ -15,6 +15,48 @@ static struct dentry *amd_iommu_debugfs; #define MAX_NAME_LEN 20 +#define OFS_IN_SZ 8 + +static ssize_t iommu_mmio_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct seq_file *m = filp->private_data; + struct amd_iommu *iommu = m->private; + int ret; + + iommu->dbg_mmio_offset = -1; + + if (cnt > OFS_IN_SZ) + return -EINVAL; + + ret = kstrtou32_from_user(ubuf, cnt, 0, &iommu->dbg_mmio_offset); + if (ret) + return ret; + + if (iommu->dbg_mmio_offset > iommu->mmio_phys_end - 4) { + iommu->dbg_mmio_offset = -1; + return -EINVAL; + } + + return cnt; +} + +static int iommu_mmio_show(struct seq_file *m, void *unused) +{ + struct amd_iommu *iommu = m->private; + u64 value; + + if (iommu->dbg_mmio_offset < 0) { + seq_puts(m, "Please provide mmio register's offset\n"); + return 0; + } + + value = readq(iommu->mmio_base + iommu->dbg_mmio_offset); + seq_printf(m, "Offset:0x%x Value:0x%016llx\n", iommu->dbg_mmio_offset, value); + + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(iommu_mmio); void amd_iommu_debugfs_setup(void) { @@ -24,7 +66,12 @@ void amd_iommu_debugfs_setup(void) amd_iommu_debugfs = debugfs_create_dir("amd", iommu_debugfs_dir); for_each_iommu(iommu) { + iommu->dbg_mmio_offset = -1; + snprintf(name, MAX_NAME_LEN, "iommu%02d", iommu->index); iommu->debugfs = debugfs_create_dir(name, amd_iommu_debugfs); + + debugfs_create_file("mmio", 0644, iommu->debugfs, iommu, + &iommu_mmio_fops); } } -- cgit v1.2.3 From 4d9c5d5a1dc940e44084e5cab780e1646501b6c1 Mon Sep 17 00:00:00 2001 From: Dheeraj Kumar Srivastava Date: Wed, 2 Jul 2025 15:07:59 +0530 Subject: iommu/amd: Add debugfs support to dump IOMMU Capability registers IOMMU Capability registers defines capabilities of IOMMU and information needed for initialising MMIO registers and device table. This is useful to dump these registers for debugging IOMMU related issues. e.g. -> To get capability registers value at offset 0x10 for iommu (say, iommu00) # echo "0x10" > /sys/kernel/debug/iommu/amd/iommu00/capability # cat /sys/kernel/debug/iommu/amd/iommu00/capability Signed-off-by: Dheeraj Kumar Srivastava Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20250702093804.849-4-dheerajkumar.srivastava@amd.com Signed-off-by: Will Deacon --- drivers/iommu/amd/amd_iommu_types.h | 1 + drivers/iommu/amd/debugfs.c | 52 +++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index e3148d7ddffe..ac61f73a110b 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -796,6 +796,7 @@ struct amd_iommu { /* DebugFS Info */ struct dentry *debugfs; int dbg_mmio_offset; + int dbg_cap_offset; #endif /* IOPF support */ diff --git a/drivers/iommu/amd/debugfs.c b/drivers/iommu/amd/debugfs.c index c025b4d2398d..f843e184a62b 100644 --- a/drivers/iommu/amd/debugfs.c +++ b/drivers/iommu/amd/debugfs.c @@ -58,6 +58,55 @@ static int iommu_mmio_show(struct seq_file *m, void *unused) } DEFINE_SHOW_STORE_ATTRIBUTE(iommu_mmio); +static ssize_t iommu_capability_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct seq_file *m = filp->private_data; + struct amd_iommu *iommu = m->private; + int ret; + + iommu->dbg_cap_offset = -1; + + if (cnt > OFS_IN_SZ) + return -EINVAL; + + ret = kstrtou32_from_user(ubuf, cnt, 0, &iommu->dbg_cap_offset); + if (ret) + return ret; + + /* Capability register at offset 0x14 is the last IOMMU capability register. */ + if (iommu->dbg_cap_offset > 0x14) { + iommu->dbg_cap_offset = -1; + return -EINVAL; + } + + return cnt; +} + +static int iommu_capability_show(struct seq_file *m, void *unused) +{ + struct amd_iommu *iommu = m->private; + u32 value; + int err; + + if (iommu->dbg_cap_offset < 0) { + seq_puts(m, "Please provide capability register's offset in the range [0x00 - 0x14]\n"); + return 0; + } + + err = pci_read_config_dword(iommu->dev, iommu->cap_ptr + iommu->dbg_cap_offset, &value); + if (err) { + seq_printf(m, "Not able to read capability register at 0x%x\n", + iommu->dbg_cap_offset); + return 0; + } + + seq_printf(m, "Offset:0x%x Value:0x%08x\n", iommu->dbg_cap_offset, value); + + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(iommu_capability); + void amd_iommu_debugfs_setup(void) { struct amd_iommu *iommu; @@ -67,11 +116,14 @@ void amd_iommu_debugfs_setup(void) for_each_iommu(iommu) { iommu->dbg_mmio_offset = -1; + iommu->dbg_cap_offset = -1; snprintf(name, MAX_NAME_LEN, "iommu%02d", iommu->index); iommu->debugfs = debugfs_create_dir(name, amd_iommu_debugfs); debugfs_create_file("mmio", 0644, iommu->debugfs, iommu, &iommu_mmio_fops); + debugfs_create_file("capability", 0644, iommu->debugfs, iommu, + &iommu_capability_fops); } } -- cgit v1.2.3 From fb3af1f4fefb78f9180446aae2834e11a6f7d134 Mon Sep 17 00:00:00 2001 From: Dheeraj Kumar Srivastava Date: Wed, 2 Jul 2025 15:08:00 +0530 Subject: iommu/amd: Add debugfs support to dump IOMMU command buffer IOMMU driver sends command to IOMMU hardware via command buffer. In cases where IOMMU hardware fails to process commands in command buffer, dumping it is a valuable input to debug the issue. IOMMU hardware processes command buffer entry at offset equals to the head pointer. Dumping just the entry at the head pointer may not always be useful. The current head may not be pointing to the entry of the command buffer which is causing the issue. IOMMU Hardware may have processed the entry and updated the head pointer. So dumping the entire command buffer gives a broad understanding of what hardware was/is doing. The command buffer dump will have all entries from start to end of the command buffer. Along with that, it will have a head and tail command buffer pointer register dump to facilitate where the IOMMU driver and hardware are in the command buffer for injecting and processing the entries respectively. Command buffer is a per IOMMU data structure. So dumping on per IOMMU basis. eg. -> To get command buffer dump for iommu (say, iommu00) #cat /sys/kernel/debug/iommu/amd/iommu00/cmdbuf Signed-off-by: Dheeraj Kumar Srivastava Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20250702093804.849-5-dheerajkumar.srivastava@amd.com Signed-off-by: Will Deacon --- drivers/iommu/amd/amd_iommu_types.h | 7 +++++++ drivers/iommu/amd/debugfs.c | 26 ++++++++++++++++++++++++++ drivers/iommu/amd/iommu.c | 7 ------- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index ac61f73a110b..687542608272 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -897,6 +897,13 @@ struct dev_table_entry { }; }; +/* + * Structure defining one entry in the command buffer + */ +struct iommu_cmd { + u32 data[4]; +}; + /* * Structure to sture persistent DTE flags from IVHD */ diff --git a/drivers/iommu/amd/debugfs.c b/drivers/iommu/amd/debugfs.c index f843e184a62b..e78f6b217a7b 100644 --- a/drivers/iommu/amd/debugfs.c +++ b/drivers/iommu/amd/debugfs.c @@ -107,6 +107,30 @@ static int iommu_capability_show(struct seq_file *m, void *unused) } DEFINE_SHOW_STORE_ATTRIBUTE(iommu_capability); +static int iommu_cmdbuf_show(struct seq_file *m, void *unused) +{ + struct amd_iommu *iommu = m->private; + struct iommu_cmd *cmd; + unsigned long flag; + u32 head, tail; + int i; + + raw_spin_lock_irqsave(&iommu->lock, flag); + head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + seq_printf(m, "CMD Buffer Head Offset:%d Tail Offset:%d\n", + (head >> 4) & 0x7fff, (tail >> 4) & 0x7fff); + for (i = 0; i < CMD_BUFFER_ENTRIES; i++) { + cmd = (struct iommu_cmd *)(iommu->cmd_buf + i * sizeof(*cmd)); + seq_printf(m, "%3d: %08x %08x %08x %08x\n", i, cmd->data[0], + cmd->data[1], cmd->data[2], cmd->data[3]); + } + raw_spin_unlock_irqrestore(&iommu->lock, flag); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(iommu_cmdbuf); + void amd_iommu_debugfs_setup(void) { struct amd_iommu *iommu; @@ -125,5 +149,7 @@ void amd_iommu_debugfs_setup(void) &iommu_mmio_fops); debugfs_create_file("capability", 0644, iommu->debugfs, iommu, &iommu_capability_fops); + debugfs_create_file("cmdbuf", 0444, iommu->debugfs, iommu, + &iommu_cmdbuf_fops); } } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index d2d1deabf7e2..1066cbea6c5a 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -62,13 +62,6 @@ static const struct iommu_dirty_ops amd_dirty_ops; int amd_iommu_max_glx_val = -1; -/* - * general struct to manage commands send to an IOMMU - */ -struct iommu_cmd { - u32 data[4]; -}; - /* * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap * to know which ones are already in use. -- cgit v1.2.3 From 2e98940f123d9c69d4759078aea9a536244c98d3 Mon Sep 17 00:00:00 2001 From: Dheeraj Kumar Srivastava Date: Wed, 2 Jul 2025 15:08:01 +0530 Subject: iommu/amd: Add support for device id user input Dumping IOMMU data structures like device table, IRT, etc., for all devices on the system will be a lot of data dumped in a file. Also, user may want to dump and analyze these data structures just for one or few devices. So dumping IOMMU data structures like device table, IRT etc for all devices is not a good approach. Add "device id" user input to be used for dumping IOMMU data structures like device table, IRT etc in AMD IOMMU debugfs. eg. 1. # echo 0000:01:00.0 > /sys/kernel/debug/iommu/amd/devid # cat /sys/kernel/debug/iommu/amd/devid Output : 0000:01:00.0 2. # echo 01:00.0 > /sys/kernel/debug/iommu/amd/devid # cat /sys/kernel/debug/iommu/amd/devid Output : 0000:01:00.0 Signed-off-by: Dheeraj Kumar Srivastava Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20250702093804.849-6-dheerajkumar.srivastava@amd.com Signed-off-by: Will Deacon --- drivers/iommu/amd/debugfs.c | 80 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/drivers/iommu/amd/debugfs.c b/drivers/iommu/amd/debugfs.c index e78f6b217a7b..065b150bba15 100644 --- a/drivers/iommu/amd/debugfs.c +++ b/drivers/iommu/amd/debugfs.c @@ -16,6 +16,9 @@ static struct dentry *amd_iommu_debugfs; #define MAX_NAME_LEN 20 #define OFS_IN_SZ 8 +#define DEVID_IN_SZ 16 + +static int sbdf = -1; static ssize_t iommu_mmio_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) @@ -131,6 +134,80 @@ static int iommu_cmdbuf_show(struct seq_file *m, void *unused) } DEFINE_SHOW_ATTRIBUTE(iommu_cmdbuf); +static ssize_t devid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct amd_iommu_pci_seg *pci_seg; + int seg, bus, slot, func; + struct amd_iommu *iommu; + char *srcid_ptr; + u16 devid; + int i; + + sbdf = -1; + + if (cnt >= DEVID_IN_SZ) + return -EINVAL; + + srcid_ptr = memdup_user_nul(ubuf, cnt); + if (IS_ERR(srcid_ptr)) + return PTR_ERR(srcid_ptr); + + i = sscanf(srcid_ptr, "%x:%x:%x.%x", &seg, &bus, &slot, &func); + if (i != 4) { + i = sscanf(srcid_ptr, "%x:%x.%x", &bus, &slot, &func); + if (i != 3) { + kfree(srcid_ptr); + return -EINVAL; + } + seg = 0; + } + + devid = PCI_DEVID(bus, PCI_DEVFN(slot, func)); + + /* Check if user device id input is a valid input */ + for_each_pci_segment(pci_seg) { + if (pci_seg->id != seg) + continue; + if (devid > pci_seg->last_bdf) { + kfree(srcid_ptr); + return -EINVAL; + } + iommu = pci_seg->rlookup_table[devid]; + if (!iommu) { + kfree(srcid_ptr); + return -ENODEV; + } + break; + } + + if (pci_seg->id != seg) { + kfree(srcid_ptr); + return -EINVAL; + } + + sbdf = PCI_SEG_DEVID_TO_SBDF(seg, devid); + + kfree(srcid_ptr); + + return cnt; +} + +static int devid_show(struct seq_file *m, void *unused) +{ + u16 devid; + + if (sbdf >= 0) { + devid = PCI_SBDF_TO_DEVID(sbdf); + seq_printf(m, "%04x:%02x:%02x.%x\n", PCI_SBDF_TO_SEGID(sbdf), + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid)); + } else + seq_puts(m, "No or Invalid input provided\n"); + + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(devid); + void amd_iommu_debugfs_setup(void) { struct amd_iommu *iommu; @@ -152,4 +229,7 @@ void amd_iommu_debugfs_setup(void) debugfs_create_file("cmdbuf", 0444, iommu->debugfs, iommu, &iommu_cmdbuf_fops); } + + debugfs_create_file("devid", 0644, amd_iommu_debugfs, NULL, + &devid_fops); } -- cgit v1.2.3 From b484577824452e526191cb87f297f78dadd97dda Mon Sep 17 00:00:00 2001 From: Dheeraj Kumar Srivastava Date: Wed, 2 Jul 2025 15:08:02 +0530 Subject: iommu/amd: Add debugfs support to dump device table IOMMU uses device table data structure to get per-device information for DMA remapping, interrupt remapping, and other functionalities. It's a valuable data structure to visualize for debugging issues related to IOMMU. eg. -> To dump device table entry for a particular device #echo 0000:c4:00.0 > /sys/kernel/debug/iommu/amd/devid #cat /sys/kernel/debug/iommu/amd/devtbl or #echo c4:00.0 > /sys/kernel/debug/iommu/amd/devid #cat /sys/kernel/debug/iommu/amd/devtbl Signed-off-by: Dheeraj Kumar Srivastava Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20250702093804.849-7-dheerajkumar.srivastava@amd.com Signed-off-by: Will Deacon --- drivers/iommu/amd/debugfs.c | 49 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/drivers/iommu/amd/debugfs.c b/drivers/iommu/amd/debugfs.c index 065b150bba15..b01eea2fbe03 100644 --- a/drivers/iommu/amd/debugfs.c +++ b/drivers/iommu/amd/debugfs.c @@ -208,6 +208,53 @@ static int devid_show(struct seq_file *m, void *unused) } DEFINE_SHOW_STORE_ATTRIBUTE(devid); +static void dump_dte(struct seq_file *m, struct amd_iommu_pci_seg *pci_seg, u16 devid) +{ + struct dev_table_entry *dev_table; + struct amd_iommu *iommu; + + iommu = pci_seg->rlookup_table[devid]; + if (!iommu) + return; + + dev_table = get_dev_table(iommu); + if (!dev_table) { + seq_puts(m, "Device table not found"); + return; + } + + seq_printf(m, "%-12s %16s %16s %16s %16s iommu\n", "DeviceId", + "QWORD[3]", "QWORD[2]", "QWORD[1]", "QWORD[0]"); + seq_printf(m, "%04x:%02x:%02x.%x ", pci_seg->id, PCI_BUS_NUM(devid), + PCI_SLOT(devid), PCI_FUNC(devid)); + for (int i = 3; i >= 0; --i) + seq_printf(m, "%016llx ", dev_table[devid].data[i]); + seq_printf(m, "iommu%d\n", iommu->index); +} + +static int iommu_devtbl_show(struct seq_file *m, void *unused) +{ + struct amd_iommu_pci_seg *pci_seg; + u16 seg, devid; + + if (sbdf < 0) { + seq_puts(m, "Enter a valid device ID to 'devid' file\n"); + return 0; + } + seg = PCI_SBDF_TO_SEGID(sbdf); + devid = PCI_SBDF_TO_DEVID(sbdf); + + for_each_pci_segment(pci_seg) { + if (pci_seg->id != seg) + continue; + dump_dte(m, pci_seg, devid); + break; + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(iommu_devtbl); + void amd_iommu_debugfs_setup(void) { struct amd_iommu *iommu; @@ -232,4 +279,6 @@ void amd_iommu_debugfs_setup(void) debugfs_create_file("devid", 0644, amd_iommu_debugfs, NULL, &devid_fops); + debugfs_create_file("devtbl", 0444, amd_iommu_debugfs, NULL, + &iommu_devtbl_fops); } -- cgit v1.2.3 From 349ad6d5263a6299aae64ad59d82bb5b03b478fa Mon Sep 17 00:00:00 2001 From: Dheeraj Kumar Srivastava Date: Wed, 2 Jul 2025 15:08:03 +0530 Subject: iommu/amd: Add debugfs support to dump IRT Table In cases where we have an issue in the device interrupt path with IOMMU interrupt remapping enabled, dumping valid IRT table entries for the device is very useful and good input for debugging the issue. eg. -> To dump irte entries for a particular device #echo "c4:00.0" > /sys/kernel/debug/iommu/amd/devid #cat /sys/kernel/debug/iommu/amd/irqtbl | less or #echo "0000:c4:00.0" > /sys/kernel/debug/iommu/amd/devid #cat /sys/kernel/debug/iommu/amd/irqtbl | less Signed-off-by: Dheeraj Kumar Srivastava Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20250702093804.849-8-dheerajkumar.srivastava@amd.com Signed-off-by: Will Deacon --- drivers/iommu/amd/debugfs.c | 108 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/drivers/iommu/amd/debugfs.c b/drivers/iommu/amd/debugfs.c index b01eea2fbe03..10fa217a7119 100644 --- a/drivers/iommu/amd/debugfs.c +++ b/drivers/iommu/amd/debugfs.c @@ -11,6 +11,7 @@ #include #include "amd_iommu.h" +#include "../irq_remapping.h" static struct dentry *amd_iommu_debugfs; @@ -255,6 +256,111 @@ static int iommu_devtbl_show(struct seq_file *m, void *unused) } DEFINE_SHOW_ATTRIBUTE(iommu_devtbl); +static void dump_128_irte(struct seq_file *m, struct irq_remap_table *table, u16 int_tab_len) +{ + struct irte_ga *ptr, *irte; + int index; + + for (index = 0; index < int_tab_len; index++) { + ptr = (struct irte_ga *)table->table; + irte = &ptr[index]; + + if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && + !irte->lo.fields_vapic.valid) + continue; + else if (!irte->lo.fields_remap.valid) + continue; + seq_printf(m, "IRT[%04d] %016llx %016llx\n", index, irte->hi.val, irte->lo.val); + } +} + +static void dump_32_irte(struct seq_file *m, struct irq_remap_table *table, u16 int_tab_len) +{ + union irte *ptr, *irte; + int index; + + for (index = 0; index < int_tab_len; index++) { + ptr = (union irte *)table->table; + irte = &ptr[index]; + + if (!irte->fields.valid) + continue; + seq_printf(m, "IRT[%04d] %08x\n", index, irte->val); + } +} + +static void dump_irte(struct seq_file *m, u16 devid, struct amd_iommu_pci_seg *pci_seg) +{ + struct dev_table_entry *dev_table; + struct irq_remap_table *table; + struct amd_iommu *iommu; + unsigned long flags; + u16 int_tab_len; + + table = pci_seg->irq_lookup_table[devid]; + if (!table) { + seq_printf(m, "IRQ lookup table not set for %04x:%02x:%02x:%x\n", + pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid)); + return; + } + + iommu = pci_seg->rlookup_table[devid]; + if (!iommu) + return; + + dev_table = get_dev_table(iommu); + if (!dev_table) { + seq_puts(m, "Device table not found"); + return; + } + + int_tab_len = dev_table[devid].data[2] & DTE_INTTABLEN_MASK; + if (int_tab_len != DTE_INTTABLEN_512 && int_tab_len != DTE_INTTABLEN_2K) { + seq_puts(m, "The device's DTE contains an invalid IRT length value."); + return; + } + + seq_printf(m, "DeviceId %04x:%02x:%02x.%x\n", pci_seg->id, PCI_BUS_NUM(devid), + PCI_SLOT(devid), PCI_FUNC(devid)); + + raw_spin_lock_irqsave(&table->lock, flags); + if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) + dump_128_irte(m, table, BIT(int_tab_len >> 1)); + else + dump_32_irte(m, table, BIT(int_tab_len >> 1)); + seq_puts(m, "\n"); + raw_spin_unlock_irqrestore(&table->lock, flags); +} + +static int iommu_irqtbl_show(struct seq_file *m, void *unused) +{ + struct amd_iommu_pci_seg *pci_seg; + u16 devid, seg; + + if (!irq_remapping_enabled) { + seq_puts(m, "Interrupt remapping is disabled\n"); + return 0; + } + + if (sbdf < 0) { + seq_puts(m, "Enter a valid device ID to 'devid' file\n"); + return 0; + } + + seg = PCI_SBDF_TO_SEGID(sbdf); + devid = PCI_SBDF_TO_DEVID(sbdf); + + for_each_pci_segment(pci_seg) { + if (pci_seg->id != seg) + continue; + dump_irte(m, devid, pci_seg); + break; + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(iommu_irqtbl); + void amd_iommu_debugfs_setup(void) { struct amd_iommu *iommu; @@ -281,4 +387,6 @@ void amd_iommu_debugfs_setup(void) &devid_fops); debugfs_create_file("devtbl", 0444, amd_iommu_debugfs, NULL, &iommu_devtbl_fops); + debugfs_create_file("irqtbl", 0444, amd_iommu_debugfs, NULL, + &iommu_irqtbl_fops); } -- cgit v1.2.3 From 39215bb3b0d929f336b6c82ff1665a3377ca0d4f Mon Sep 17 00:00:00 2001 From: Dheeraj Kumar Srivastava Date: Wed, 2 Jul 2025 15:08:04 +0530 Subject: iommu/amd: Add documentation for AMD IOMMU debugfs support Add documentation describing how to use AMD IOMMU debugfs support to dump IOMMU data structures - IRT table, Device table, Registers (MMIO and Capability) and command buffer. Signed-off-by: Dheeraj Kumar Srivastava Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20250702093804.849-9-dheerajkumar.srivastava@amd.com Signed-off-by: Will Deacon --- Documentation/ABI/testing/debugfs-amd-iommu | 114 ++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 Documentation/ABI/testing/debugfs-amd-iommu diff --git a/Documentation/ABI/testing/debugfs-amd-iommu b/Documentation/ABI/testing/debugfs-amd-iommu new file mode 100644 index 000000000000..c14b1620aec1 --- /dev/null +++ b/Documentation/ABI/testing/debugfs-amd-iommu @@ -0,0 +1,114 @@ +What: /sys/kernel/debug/iommu/amd/iommu/mmio +Date: January 2025 +Contact: Dheeraj Kumar Srivastava +Description: + This file provides read/write access for user input. Users specify the + MMIO register offset for iommu, and the file outputs the corresponding + MMIO register value of iommu + + Example: + $ echo "0x18" > /sys/kernel/debug/iommu/amd/iommu00/mmio + $ cat /sys/kernel/debug/iommu/amd/iommu00/mmio + + Output: + Offset:0x18 Value:0x000c22000003f48d + +What: /sys/kernel/debug/iommu/amd/iommu/capability +Date: January 2025 +Contact: Dheeraj Kumar Srivastava +Description: + This file provides read/write access for user input. Users specify the + capability register offset for iommu, and the file outputs the + corresponding capability register value of iommu. + + Example: + $ echo "0x10" > /sys/kernel/debug/iommu/amd/iommu00/capability + $ cat /sys/kernel/debug/iommu/amd/iommu00/capability + + Output: + Offset:0x10 Value:0x00203040 + +What: /sys/kernel/debug/iommu/amd/iommu/cmdbuf +Date: January 2025 +Contact: Dheeraj Kumar Srivastava +Description: + This file is a read-only output file containing iommu command + buffer entries. + + Examples: + $ cat /sys/kernel/debug/iommu/amd/iommu/cmdbuf + + Output: + CMD Buffer Head Offset:339 Tail Offset:339 + 0: 00835001 10000001 00003c00 00000000 + 1: 00000000 30000005 fffff003 7fffffff + 2: 00835001 10000001 00003c01 00000000 + 3: 00000000 30000005 fffff003 7fffffff + 4: 00835001 10000001 00003c02 00000000 + 5: 00000000 30000005 fffff003 7fffffff + 6: 00835001 10000001 00003c03 00000000 + 7: 00000000 30000005 fffff003 7fffffff + 8: 00835001 10000001 00003c04 00000000 + 9: 00000000 30000005 fffff003 7fffffff + 10: 00835001 10000001 00003c05 00000000 + 11: 00000000 30000005 fffff003 7fffffff + [...] + +What: /sys/kernel/debug/iommu/amd/devid +Date: January 2025 +Contact: Dheeraj Kumar Srivastava +Description: + This file provides read/write access for user input. Users specify the + device ID, which can be used to dump IOMMU data structures such as the + interrupt remapping table and device table. + + Example: + 1. + $ echo 0000:01:00.0 > /sys/kernel/debug/iommu/amd/devid + $ cat /sys/kernel/debug/iommu/amd/devid + + Output: + 0000:01:00.0 + + 2. + $ echo 01:00.0 > /sys/kernel/debug/iommu/amd/devid + $ cat /sys/kernel/debug/iommu/amd/devid + + Output: + 0000:01:00.0 + +What: /sys/kernel/debug/iommu/amd/devtbl +Date: January 2025 +Contact: Dheeraj Kumar Srivastava +Description: + This file is a read-only output file containing the device table entry + for the device ID provided in /sys/kernel/debug/iommu/amd/devid. + + Example: + $ cat /sys/kernel/debug/iommu/amd/devtbl + + Output: + DeviceId QWORD[3] QWORD[2] QWORD[1] QWORD[0] iommu + 0000:01:00.0 0000000000000000 20000001373b8013 0000000000000038 6000000114d7b603 iommu3 + +What: /sys/kernel/debug/iommu/amd/irqtbl +Date: January 2025 +Contact: Dheeraj Kumar Srivastava +Description: + This file is a read-only output file containing valid IRT table entries + for the device ID provided in /sys/kernel/debug/iommu/amd/devid. + + Example: + $ cat /sys/kernel/debug/iommu/amd/irqtbl + + Output: + DeviceId 0000:01:00.0 + IRT[0000] 0000000000000020 0000000000000241 + IRT[0001] 0000000000000020 0000000000000841 + IRT[0002] 0000000000000020 0000000000002041 + IRT[0003] 0000000000000020 0000000000008041 + IRT[0004] 0000000000000020 0000000000020041 + IRT[0005] 0000000000000020 0000000000080041 + IRT[0006] 0000000000000020 0000000000200041 + IRT[0007] 0000000000000020 0000000000800041 + [...] -- cgit v1.2.3 From 9628e5c85b1e0fd1e30d8f797ba1289de2708d3a Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Thu, 17 Jul 2025 08:03:31 +0700 Subject: iommu/amd: Wrap debugfs ABI testing symbols snippets in literal code blocks Commit 39215bb3b0d929 ("iommu/amd: Add documentation for AMD IOMMU debugfs support") documents debugfs ABI symbols for AMD IOMMU, but forgets to wrap examples snippets and their output in literal code blocks, hence Sphinx reports indentation warnings: Documentation/ABI/testing/debugfs-amd-iommu:31: ERROR: Unexpected indentation. [docutils] Documentation/ABI/testing/debugfs-amd-iommu:31: WARNING: Block quote ends without a blank line; unexpected unindent. [docutils] Documentation/ABI/testing/debugfs-amd-iommu:31: WARNING: Block quote ends without a blank line; unexpected unindent. [docutils] Wrap them to fix the warnings. Fixes: 39215bb3b0d9 ("iommu/amd: Add documentation for AMD IOMMU debugfs support") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/linux-next/20250716204207.73869849@canb.auug.org.au/ Signed-off-by: Bagas Sanjaya Acked-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://lore.kernel.org/r/20250717010331.8941-1-bagasdotme@gmail.com Signed-off-by: Will Deacon --- Documentation/ABI/testing/debugfs-amd-iommu | 127 ++++++++++++++++------------ 1 file changed, 72 insertions(+), 55 deletions(-) diff --git a/Documentation/ABI/testing/debugfs-amd-iommu b/Documentation/ABI/testing/debugfs-amd-iommu index c14b1620aec1..5621a66aa693 100644 --- a/Documentation/ABI/testing/debugfs-amd-iommu +++ b/Documentation/ABI/testing/debugfs-amd-iommu @@ -6,12 +6,14 @@ Description: MMIO register offset for iommu, and the file outputs the corresponding MMIO register value of iommu - Example: - $ echo "0x18" > /sys/kernel/debug/iommu/amd/iommu00/mmio - $ cat /sys/kernel/debug/iommu/amd/iommu00/mmio + Example:: + + $ echo "0x18" > /sys/kernel/debug/iommu/amd/iommu00/mmio + $ cat /sys/kernel/debug/iommu/amd/iommu00/mmio + + Output:: - Output: - Offset:0x18 Value:0x000c22000003f48d + Offset:0x18 Value:0x000c22000003f48d What: /sys/kernel/debug/iommu/amd/iommu/capability Date: January 2025 @@ -21,12 +23,14 @@ Description: capability register offset for iommu, and the file outputs the corresponding capability register value of iommu. - Example: - $ echo "0x10" > /sys/kernel/debug/iommu/amd/iommu00/capability - $ cat /sys/kernel/debug/iommu/amd/iommu00/capability + Example:: + + $ echo "0x10" > /sys/kernel/debug/iommu/amd/iommu00/capability + $ cat /sys/kernel/debug/iommu/amd/iommu00/capability + + Output:: - Output: - Offset:0x10 Value:0x00203040 + Offset:0x10 Value:0x00203040 What: /sys/kernel/debug/iommu/amd/iommu/cmdbuf Date: January 2025 @@ -35,24 +39,26 @@ Description: This file is a read-only output file containing iommu command buffer entries. - Examples: - $ cat /sys/kernel/debug/iommu/amd/iommu/cmdbuf - - Output: - CMD Buffer Head Offset:339 Tail Offset:339 - 0: 00835001 10000001 00003c00 00000000 - 1: 00000000 30000005 fffff003 7fffffff - 2: 00835001 10000001 00003c01 00000000 - 3: 00000000 30000005 fffff003 7fffffff - 4: 00835001 10000001 00003c02 00000000 - 5: 00000000 30000005 fffff003 7fffffff - 6: 00835001 10000001 00003c03 00000000 - 7: 00000000 30000005 fffff003 7fffffff - 8: 00835001 10000001 00003c04 00000000 - 9: 00000000 30000005 fffff003 7fffffff - 10: 00835001 10000001 00003c05 00000000 - 11: 00000000 30000005 fffff003 7fffffff - [...] + Examples:: + + $ cat /sys/kernel/debug/iommu/amd/iommu/cmdbuf + + Output:: + + CMD Buffer Head Offset:339 Tail Offset:339 + 0: 00835001 10000001 00003c00 00000000 + 1: 00000000 30000005 fffff003 7fffffff + 2: 00835001 10000001 00003c01 00000000 + 3: 00000000 30000005 fffff003 7fffffff + 4: 00835001 10000001 00003c02 00000000 + 5: 00000000 30000005 fffff003 7fffffff + 6: 00835001 10000001 00003c03 00000000 + 7: 00000000 30000005 fffff003 7fffffff + 8: 00835001 10000001 00003c04 00000000 + 9: 00000000 30000005 fffff003 7fffffff + 10: 00835001 10000001 00003c05 00000000 + 11: 00000000 30000005 fffff003 7fffffff + [...] What: /sys/kernel/debug/iommu/amd/devid Date: January 2025 @@ -63,19 +69,26 @@ Description: interrupt remapping table and device table. Example: + 1. - $ echo 0000:01:00.0 > /sys/kernel/debug/iommu/amd/devid - $ cat /sys/kernel/debug/iommu/amd/devid + :: + + $ echo 0000:01:00.0 > /sys/kernel/debug/iommu/amd/devid + $ cat /sys/kernel/debug/iommu/amd/devid - Output: - 0000:01:00.0 + Output:: + + 0000:01:00.0 2. - $ echo 01:00.0 > /sys/kernel/debug/iommu/amd/devid - $ cat /sys/kernel/debug/iommu/amd/devid + :: + + $ echo 01:00.0 > /sys/kernel/debug/iommu/amd/devid + $ cat /sys/kernel/debug/iommu/amd/devid + + Output:: - Output: - 0000:01:00.0 + 0000:01:00.0 What: /sys/kernel/debug/iommu/amd/devtbl Date: January 2025 @@ -84,12 +97,14 @@ Description: This file is a read-only output file containing the device table entry for the device ID provided in /sys/kernel/debug/iommu/amd/devid. - Example: - $ cat /sys/kernel/debug/iommu/amd/devtbl + Example:: + + $ cat /sys/kernel/debug/iommu/amd/devtbl - Output: - DeviceId QWORD[3] QWORD[2] QWORD[1] QWORD[0] iommu - 0000:01:00.0 0000000000000000 20000001373b8013 0000000000000038 6000000114d7b603 iommu3 + Output:: + + DeviceId QWORD[3] QWORD[2] QWORD[1] QWORD[0] iommu + 0000:01:00.0 0000000000000000 20000001373b8013 0000000000000038 6000000114d7b603 iommu3 What: /sys/kernel/debug/iommu/amd/irqtbl Date: January 2025 @@ -98,17 +113,19 @@ Description: This file is a read-only output file containing valid IRT table entries for the device ID provided in /sys/kernel/debug/iommu/amd/devid. - Example: - $ cat /sys/kernel/debug/iommu/amd/irqtbl - - Output: - DeviceId 0000:01:00.0 - IRT[0000] 0000000000000020 0000000000000241 - IRT[0001] 0000000000000020 0000000000000841 - IRT[0002] 0000000000000020 0000000000002041 - IRT[0003] 0000000000000020 0000000000008041 - IRT[0004] 0000000000000020 0000000000020041 - IRT[0005] 0000000000000020 0000000000080041 - IRT[0006] 0000000000000020 0000000000200041 - IRT[0007] 0000000000000020 0000000000800041 - [...] + Example:: + + $ cat /sys/kernel/debug/iommu/amd/irqtbl + + Output:: + + DeviceId 0000:01:00.0 + IRT[0000] 0000000000000020 0000000000000241 + IRT[0001] 0000000000000020 0000000000000841 + IRT[0002] 0000000000000020 0000000000002041 + IRT[0003] 0000000000000020 0000000000008041 + IRT[0004] 0000000000000020 0000000000020041 + IRT[0005] 0000000000000020 0000000000080041 + IRT[0006] 0000000000000020 0000000000200041 + IRT[0007] 0000000000000020 0000000000800041 + [...] -- cgit v1.2.3 From 8637afa79cfa6123f602408cfafe8c9a73620ff1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 9 Jun 2025 20:58:05 -0300 Subject: iommu/amd: Fix geometry.aperture_end for V2 tables The AMD IOMMU documentation seems pretty clear that the V2 table follows the normal CPU expectation of sign extension. This is shown in Figure 25: AMD64 Long Mode 4-Kbyte Page Address Translation Where bits Sign-Extend [63:57] == [56]. This is typical for x86 which would have three regions in the page table: lower, non-canonical, upper. The manual describes that the V1 table does not sign extend in section 2.2.4 Sharing AMD64 Processor and IOMMU Page Tables GPA-to-SPA Further, Vasant has checked this and indicates the HW has an addtional behavior that the manual does not yet describe. The AMDv2 table does not have the sign extended behavior when attached to PASID 0, which may explain why this has gone unnoticed. The iommu domain geometry does not directly support sign extended page tables. The driver should report only one of the lower/upper spaces. Solve this by removing the top VA bit from the geometry to use only the lower space. This will also make the iommu_domain work consistently on all PASID 0 and PASID != 1. Adjust dma_max_address() to remove the top VA bit. It now returns: 5 Level: Before 0x1ffffffffffffff After 0x0ffffffffffffff 4 Level: Before 0xffffffffffff After 0x7fffffffffff Fixes: 11c439a19466 ("iommu/amd/pgtbl_v2: Fix domain max address") Link: https://lore.kernel.org/all/8858d4d6-d360-4ef0-935c-bfd13ea54f42@amd.com/ Signed-off-by: Jason Gunthorpe Reviewed-by: Vasant Hegde Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/0-v2-0615cc99b88a+1ce-amdv2_geo_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/amd/iommu.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 1066cbea6c5a..72cf2bf4b773 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2529,8 +2529,21 @@ static inline u64 dma_max_address(enum protection_domain_mode pgtable) if (pgtable == PD_MODE_V1) return PM_LEVEL_SIZE(amd_iommu_hpt_level); - /* V2 with 4/5 level page table */ - return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1); + /* + * V2 with 4/5 level page table. Note that "2.2.6.5 AMD64 4-Kbyte Page + * Translation" shows that the V2 table sign extends the top of the + * address space creating a reserved region in the middle of the + * translation, just like the CPU does. Further Vasant says the docs are + * incomplete and this only applies to non-zero PASIDs. If the AMDv2 + * page table is assigned to the 0 PASID then there is no sign extension + * check. + * + * Since the IOMMU must have a fixed geometry, and the core code does + * not understand sign extended addressing, we have to chop off the high + * bit to get consistent behavior with attachments of the domain to any + * PASID. + */ + return ((1ULL << (PM_LEVEL_SHIFT(amd_iommu_gpt_level) - 1)) - 1); } static bool amd_iommu_hd_support(struct amd_iommu *iommu) -- cgit v1.2.3