From a2b4a79b88b24c49d98d45a06a014ffd22ada1a4 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sun, 10 Sep 2017 20:29:45 +0300 Subject: spi: uapi: spidev: add missing ioctl header The SPI_IOC_MESSAGE() macro references _IOC_SIZEBITS. Add linux/ioctl.h to make sure this macro is defined. This fixes the following build failure of lcdproc with the musl libc: In file included from .../sysroot/usr/include/sys/ioctl.h:7:0, from hd44780-spi.c:31: hd44780-spi.c: In function 'spi_transfer': hd44780-spi.c:89:24: error: '_IOC_SIZEBITS' undeclared (first use in this function) status = ioctl(p->fd, SPI_IOC_MESSAGE(1), &xfer); ^ Signed-off-by: Baruch Siach Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- include/uapi/linux/spi/spidev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/spi/spidev.h b/include/uapi/linux/spi/spidev.h index dd5f21e75805..856de39d0b89 100644 --- a/include/uapi/linux/spi/spidev.h +++ b/include/uapi/linux/spi/spidev.h @@ -23,6 +23,7 @@ #define SPIDEV_H #include +#include /* User space versions of kernel symbols for SPI clocking modes, * matching -- cgit v1.2.3 From bd7a3fe770ebd8391d1c7d072ff88e9e76d063eb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 19 Sep 2017 15:07:17 +0200 Subject: USB: fix out-of-bounds in usb_set_configuration Andrey Konovalov reported a possible out-of-bounds problem for a USB interface association descriptor. He writes: It seems there's no proper size check of a USB_DT_INTERFACE_ASSOCIATION descriptor. It's only checked that the size is >= 2 in usb_parse_configuration(), so find_iad() might do out-of-bounds access to intf_assoc->bInterfaceCount. And he's right, we don't check for crazy descriptors of this type very well, so resolve this problem. Yet another issue found by syzkaller... Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 14 +++++++++++--- include/uapi/linux/usb/ch9.h | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 854c8d66cfbe..68b54bd88d1e 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -643,15 +643,23 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, } else if (header->bDescriptorType == USB_DT_INTERFACE_ASSOCIATION) { + struct usb_interface_assoc_descriptor *d; + + d = (struct usb_interface_assoc_descriptor *)header; + if (d->bLength < USB_DT_INTERFACE_ASSOCIATION_SIZE) { + dev_warn(ddev, + "config %d has an invalid interface association descriptor of length %d, skipping\n", + cfgno, d->bLength); + continue; + } + if (iad_num == USB_MAXIADS) { dev_warn(ddev, "found more Interface " "Association Descriptors " "than allocated for in " "configuration %d\n", cfgno); } else { - config->intf_assoc[iad_num] = - (struct usb_interface_assoc_descriptor - *)header; + config->intf_assoc[iad_num] = d; iad_num++; } diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index ce1169af39d7..2a5d63040a0b 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -780,6 +780,7 @@ struct usb_interface_assoc_descriptor { __u8 iFunction; } __attribute__ ((packed)); +#define USB_DT_INTERFACE_ASSOCIATION_SIZE 8 /*-------------------------------------------------------------------------*/ -- cgit v1.2.3 From 62e082430ea4bb5b28909ca4375bb683931e22aa Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 20 Sep 2017 07:29:49 -0400 Subject: dm ioctl: fix alignment of event number in the device list The size of struct dm_name_list is different on 32-bit and 64-bit kernels (so "(nl + 1)" differs between 32-bit and 64-bit kernels). This mismatch caused some harmless difference in padding when using 32-bit or 64-bit kernel. Commit 23d70c5e52dd ("dm ioctl: report event number in DM_LIST_DEVICES") added reporting event number in the output of DM_LIST_DEVICES_CMD. This difference in padding makes it impossible for userspace to determine the location of the event number (the location would be different when running on 32-bit and 64-bit kernels). Fix the padding by using offsetof(struct dm_name_list, name) instead of sizeof(struct dm_name_list) to determine the location of entries. Also, the ioctl version number is incremented to 37 so that userspace can use the version number to determine that the event number is present and correctly located. In addition, a global event is now raised when a DM device is created, removed, renamed or when table is swapped, so that the user can monitor for device changes. Reported-by: Eugene Syromiatnikov Fixes: 23d70c5e52dd ("dm ioctl: report event number in DM_LIST_DEVICES") Cc: stable@vger.kernel.org # 4.13 Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 1 + drivers/md/dm-ioctl.c | 37 ++++++++++++++++++++++++------------- drivers/md/dm.c | 10 ++++++++-- include/uapi/linux/dm-ioctl.h | 4 ++-- 4 files changed, 35 insertions(+), 17 deletions(-) (limited to 'include/uapi') diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 24eddbdf2ab4..203144762f36 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -149,5 +149,6 @@ static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen extern atomic_t dm_global_event_nr; extern wait_queue_head_t dm_global_eventq; +void dm_issue_global_event(void); #endif diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 8756a6850431..e52676fa9832 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -477,9 +477,13 @@ static int remove_all(struct file *filp, struct dm_ioctl *param, size_t param_si * Round up the ptr to an 8-byte boundary. */ #define ALIGN_MASK 7 +static inline size_t align_val(size_t val) +{ + return (val + ALIGN_MASK) & ~ALIGN_MASK; +} static inline void *align_ptr(void *ptr) { - return (void *) (((size_t) (ptr + ALIGN_MASK)) & ~ALIGN_MASK); + return (void *)align_val((size_t)ptr); } /* @@ -505,7 +509,7 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ struct hash_cell *hc; size_t len, needed = 0; struct gendisk *disk; - struct dm_name_list *nl, *old_nl = NULL; + struct dm_name_list *orig_nl, *nl, *old_nl = NULL; uint32_t *event_nr; down_write(&_hash_lock); @@ -516,17 +520,15 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ */ for (i = 0; i < NUM_BUCKETS; i++) { list_for_each_entry (hc, _name_buckets + i, name_list) { - needed += sizeof(struct dm_name_list); - needed += strlen(hc->name) + 1; - needed += ALIGN_MASK; - needed += (sizeof(uint32_t) + ALIGN_MASK) & ~ALIGN_MASK; + needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1); + needed += align_val(sizeof(uint32_t)); } } /* * Grab our output buffer. */ - nl = get_result_buffer(param, param_size, &len); + nl = orig_nl = get_result_buffer(param, param_size, &len); if (len < needed) { param->flags |= DM_BUFFER_FULL_FLAG; goto out; @@ -549,11 +551,16 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ strcpy(nl->name, hc->name); old_nl = nl; - event_nr = align_ptr(((void *) (nl + 1)) + strlen(hc->name) + 1); + event_nr = align_ptr(nl->name + strlen(hc->name) + 1); *event_nr = dm_get_event_nr(hc->md); nl = align_ptr(event_nr + 1); } } + /* + * If mismatch happens, security may be compromised due to buffer + * overflow, so it's better to crash. + */ + BUG_ON((char *)nl - (char *)orig_nl != needed); out: up_write(&_hash_lock); @@ -1621,7 +1628,8 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para * which has a variable size, is not used by the function processing * the ioctl. */ -#define IOCTL_FLAGS_NO_PARAMS 1 +#define IOCTL_FLAGS_NO_PARAMS 1 +#define IOCTL_FLAGS_ISSUE_GLOBAL_EVENT 2 /*----------------------------------------------------------------- * Implementation of open/close/ioctl on the special char @@ -1635,12 +1643,12 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags) ioctl_fn fn; } _ioctls[] = { {DM_VERSION_CMD, 0, NULL}, /* version is dealt with elsewhere */ - {DM_REMOVE_ALL_CMD, IOCTL_FLAGS_NO_PARAMS, remove_all}, + {DM_REMOVE_ALL_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, remove_all}, {DM_LIST_DEVICES_CMD, 0, list_devices}, - {DM_DEV_CREATE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_create}, - {DM_DEV_REMOVE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_remove}, - {DM_DEV_RENAME_CMD, 0, dev_rename}, + {DM_DEV_CREATE_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_create}, + {DM_DEV_REMOVE_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_remove}, + {DM_DEV_RENAME_CMD, IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_rename}, {DM_DEV_SUSPEND_CMD, IOCTL_FLAGS_NO_PARAMS, dev_suspend}, {DM_DEV_STATUS_CMD, IOCTL_FLAGS_NO_PARAMS, dev_status}, {DM_DEV_WAIT_CMD, 0, dev_wait}, @@ -1869,6 +1877,9 @@ static int ctl_ioctl(struct file *file, uint command, struct dm_ioctl __user *us unlikely(ioctl_flags & IOCTL_FLAGS_NO_PARAMS)) DMERR("ioctl %d tried to output some data but has IOCTL_FLAGS_NO_PARAMS set", cmd); + if (!r && ioctl_flags & IOCTL_FLAGS_ISSUE_GLOBAL_EVENT) + dm_issue_global_event(); + /* * Copy the results back to userland. */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6e54145969c5..4be85324f44d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -52,6 +52,12 @@ static struct workqueue_struct *deferred_remove_workqueue; atomic_t dm_global_event_nr = ATOMIC_INIT(0); DECLARE_WAIT_QUEUE_HEAD(dm_global_eventq); +void dm_issue_global_event(void) +{ + atomic_inc(&dm_global_event_nr); + wake_up(&dm_global_eventq); +} + /* * One of these is allocated per bio. */ @@ -1865,9 +1871,8 @@ static void event_callback(void *context) dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); atomic_inc(&md->event_nr); - atomic_inc(&dm_global_event_nr); wake_up(&md->eventq); - wake_up(&dm_global_eventq); + dm_issue_global_event(); } /* @@ -2283,6 +2288,7 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) } map = __bind(md, table, &limits); + dm_issue_global_event(); out: mutex_unlock(&md->suspend_lock); diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 412c06a624c8..ccaea525340b 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -269,9 +269,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 36 +#define DM_VERSION_MINOR 37 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2017-06-09)" +#define DM_VERSION_EXTRA "-ioctl (2017-09-20)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From 90caccdd8cc0215705f18b92771b449b01e2474a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 3 Oct 2017 15:37:20 -0700 Subject: bpf: fix bpf_tail_call() x64 JIT - bpf prog_array just like all other types of bpf array accepts 32-bit index. Clarify that in the comment. - fix x64 JIT of bpf_tail_call which was incorrectly loading 8 instead of 4 bytes - tighten corresponding check in the interpreter to stay consistent The JIT bug can be triggered after introduction of BPF_F_NUMA_NODE flag in commit 96eabe7a40aa in 4.14. Before that the map_flags would stay zero and though JIT code is wrong it will check bounds correctly. Hence two fixes tags. All other JITs don't have this problem. Signed-off-by: Alexei Starovoitov Fixes: 96eabe7a40aa ("bpf: Allow selecting numa node during map creation") Fixes: b52f00e6a715 ("x86: bpf_jit: implement bpf_tail_call() helper") Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 4 ++-- include/uapi/linux/bpf.h | 2 +- kernel/bpf/core.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8c9573660d51..0554e8aef4d5 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -284,9 +284,9 @@ static void emit_bpf_tail_call(u8 **pprog) /* if (index >= array->map.max_entries) * goto out; */ - EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ + EMIT2(0x89, 0xD2); /* mov edx, edx */ + EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); - EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ #define OFFSET1 43 /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 43ab5c402f98..f90860d1f897 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -312,7 +312,7 @@ union bpf_attr { * jump into another BPF program * @ctx: context pointer passed to next program * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY - * @index: index inside array that selects specific program to run + * @index: 32-bit index inside array that selects specific program to run * Return: 0 on success or negative error * * int bpf_clone_redirect(skb, ifindex, flags) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 917cc04a0a94..7b62df86be1d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1022,7 +1022,7 @@ select_insn: struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_prog *prog; - u64 index = BPF_R3; + u32 index = BPF_R3; if (unlikely(index >= array->map.max_entries)) goto out; -- cgit v1.2.3 From 98589a0998b8b13c4a8fa1ccb0e62751a019faa5 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Mon, 9 Oct 2017 15:27:15 +0300 Subject: netfilter: xt_bpf: Fix XT_BPF_MODE_FD_PINNED mode of 'xt_bpf_info_v1' Commit 2c16d6033264 ("netfilter: xt_bpf: support ebpf") introduced support for attaching an eBPF object by an fd, with the 'bpf_mt_check_v1' ABI expecting the '.fd' to be specified upon each IPT_SO_SET_REPLACE call. However this breaks subsequent iptables calls: # iptables -A INPUT -m bpf --object-pinned /sys/fs/bpf/xxx -j ACCEPT # iptables -A INPUT -s 5.6.7.8 -j ACCEPT iptables: Invalid argument. Run `dmesg' for more information. That's because iptables works by loading existing rules using IPT_SO_GET_ENTRIES to userspace, then issuing IPT_SO_SET_REPLACE with the replacement set. However, the loaded 'xt_bpf_info_v1' has an arbitrary '.fd' number (from the initial "iptables -m bpf" invocation) - so when 2nd invocation occurs, userspace passes a bogus fd number, which leads to 'bpf_mt_check_v1' to fail. One suggested solution [1] was to hack iptables userspace, to perform a "entries fixup" immediatley after IPT_SO_GET_ENTRIES, by opening a new, process-local fd per every 'xt_bpf_info_v1' entry seen. However, in [2] both Pablo Neira Ayuso and Willem de Bruijn suggested to depricate the xt_bpf_info_v1 ABI dealing with pinned ebpf objects. This fix changes the XT_BPF_MODE_FD_PINNED behavior to ignore the given '.fd' and instead perform an in-kernel lookup for the bpf object given the provided '.path'. It also defines an alias for the XT_BPF_MODE_FD_PINNED mode, named XT_BPF_MODE_PATH_PINNED, to better reflect the fact that the user is expected to provide the path of the pinned object. Existing XT_BPF_MODE_FD_ELF behavior (non-pinned fd mode) is preserved. References: [1] https://marc.info/?l=netfilter-devel&m=150564724607440&w=2 [2] https://marc.info/?l=netfilter-devel&m=150575727129880&w=2 Reported-by: Rafael Buchbinder Signed-off-by: Shmulik Ladkani Acked-by: Willem de Bruijn Acked-by: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/linux/bpf.h | 5 +++++ include/uapi/linux/netfilter/xt_bpf.h | 1 + kernel/bpf/inode.c | 1 + net/netfilter/xt_bpf.c | 22 ++++++++++++++++++++-- 4 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8390859e79e7..f1af7d63d678 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -368,6 +368,11 @@ static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) { } +static inline int bpf_obj_get_user(const char __user *pathname) +{ + return -EOPNOTSUPP; +} + static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key) { diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h index b97725af2ac0..da161b56c79e 100644 --- a/include/uapi/linux/netfilter/xt_bpf.h +++ b/include/uapi/linux/netfilter/xt_bpf.h @@ -23,6 +23,7 @@ enum xt_bpf_modes { XT_BPF_MODE_FD_PINNED, XT_BPF_MODE_FD_ELF, }; +#define XT_BPF_MODE_PATH_PINNED XT_BPF_MODE_FD_PINNED struct xt_bpf_info_v1 { __u16 mode; diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index e833ed914358..be1dde967208 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -363,6 +363,7 @@ out: putname(pname); return ret; } +EXPORT_SYMBOL_GPL(bpf_obj_get_user); static void bpf_evict_inode(struct inode *inode) { diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 38986a95216c..29123934887b 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -49,6 +50,22 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret) return 0; } +static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) +{ + mm_segment_t oldfs = get_fs(); + int retval, fd; + + set_fs(KERNEL_DS); + fd = bpf_obj_get_user(path); + set_fs(oldfs); + if (fd < 0) + return fd; + + retval = __bpf_mt_check_fd(fd, ret); + sys_close(fd); + return retval; +} + static int bpf_mt_check(const struct xt_mtchk_param *par) { struct xt_bpf_info *info = par->matchinfo; @@ -66,9 +83,10 @@ static int bpf_mt_check_v1(const struct xt_mtchk_param *par) return __bpf_mt_check_bytecode(info->bpf_program, info->bpf_program_num_elem, &info->filter); - else if (info->mode == XT_BPF_MODE_FD_PINNED || - info->mode == XT_BPF_MODE_FD_ELF) + else if (info->mode == XT_BPF_MODE_FD_ELF) return __bpf_mt_check_fd(info->fd, &info->filter); + else if (info->mode == XT_BPF_MODE_PATH_PINNED) + return __bpf_mt_check_path(info->path, &info->filter); else return -EINVAL; } -- cgit v1.2.3 From 177ae09b5d699a5ebd1cafcee78889db968abf54 Mon Sep 17 00:00:00 2001 From: Andres Rodriguez Date: Fri, 15 Sep 2017 20:44:06 -0400 Subject: drm/amdgpu: introduce AMDGPU_GEM_CREATE_EXPLICIT_SYNC v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a flag to signal that access to a BO will be synchronized through an external mechanism. Currently all buffers shared between contexts are subject to implicit synchronization. However, this is only required for protocols that currently don't support an explicit synchronization mechanism (DRI2/3). This patch introduces the AMDGPU_GEM_CREATE_EXPLICIT_SYNC, so that users can specify when it is safe to disable implicit sync. v2: only disable explicit sync in amdgpu_cs_ioctl Reviewed-by: Christian König Signed-off-by: Andres Rodriguez Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 7 +++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++++---- include/uapi/drm/amdgpu_drm.h | 2 ++ 8 files changed, 29 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ab83dfcabb41..38027a00f8ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -705,7 +705,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) list_for_each_entry(e, &p->validated, tv.head) { struct reservation_object *resv = e->robj->tbo.resv; - r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp); + r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, + amdgpu_bo_explicit_sync(e->robj)); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b0d45c8e6bb3..21e99366cab3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -212,7 +212,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_VRAM_CLEARED | - AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)) + AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | + AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) + return -EINVAL; /* reject invalid gem domains */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index c26ef53604af..428aae048f4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -193,6 +193,14 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) } } +/** + * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced + */ +static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) +{ + return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; +} + int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index c586f44312f9..a4bf21f8f1c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -169,14 +169,14 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, * * @sync: sync object to add fences from reservation object to * @resv: reservation object with embedded fence - * @shared: true if we should only sync to the exclusive fence + * @explicit_sync: true if we should only sync to the exclusive fence * * Sync to the fence */ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, - void *owner) + void *owner, bool explicit_sync) { struct reservation_object_list *flist; struct dma_fence *f; @@ -191,6 +191,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, f = reservation_object_get_excl(resv); r = amdgpu_sync_fence(adev, sync, f); + if (explicit_sync) + return r; + flist = reservation_object_get_list(resv); if (!flist || r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index dc7687993317..70d7e3a279a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -45,7 +45,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, - void *owner); + void *owner, + bool explicit_sync); struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 10952c3e5eb6..a2282bacf960 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1489,7 +1489,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, job->vm_needs_flush = vm_needs_flush; if (resv) { r = amdgpu_sync_resv(adev, &job->sync, resv, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, + false); if (r) { DRM_ERROR("sync failed (%d).\n", r); goto error_free; @@ -1581,7 +1582,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, if (resv) { r = amdgpu_sync_resv(adev, &job->sync, resv, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, false); if (r) { DRM_ERROR("sync failed (%d).\n", r); goto error_free; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index eb4a01c14eee..c559d76ff695 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1035,7 +1035,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, int r; amdgpu_sync_create(&sync); - amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner); + amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false); r = amdgpu_sync_wait(&sync, true); amdgpu_sync_free(&sync); @@ -1176,11 +1176,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, amdgpu_ring_pad_ib(ring, params.ib); amdgpu_sync_resv(adev, &job->sync, parent->base.bo->tbo.resv, - AMDGPU_FENCE_OWNER_VM); + AMDGPU_FENCE_OWNER_VM, false); if (shadow) amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv, - AMDGPU_FENCE_OWNER_VM); + AMDGPU_FENCE_OWNER_VM, false); WARN_ON(params.ib->length_dw > ndw); r = amdgpu_job_submit(job, ring, &vm->entity, @@ -1644,7 +1644,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, goto error_free; r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, - owner); + owner, false); if (r) goto error_free; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4c6e8c482ee4..b62484af8ccb 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -91,6 +91,8 @@ extern "C" { #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) /* Flag that BO is always valid in this VM */ #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) +/* Flag that BO sharing will be explicitly synchronized */ +#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC (1 << 7) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- cgit v1.2.3 From c2636dc53abd8269a0930bccd564f2f195dba729 Mon Sep 17 00:00:00 2001 From: Andres Rodriguez Date: Thu, 22 Dec 2016 17:06:50 -0500 Subject: drm/amdgpu: add parameter to allocate high priority contexts v11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new context creation parameter to express a global context priority. The priority ranking in descending order is as follows: * AMDGPU_CTX_PRIORITY_HIGH_HW * AMDGPU_CTX_PRIORITY_HIGH_SW * AMDGPU_CTX_PRIORITY_NORMAL * AMDGPU_CTX_PRIORITY_LOW_SW * AMDGPU_CTX_PRIORITY_LOW_HW The driver will attempt to schedule work to the hardware according to the priorities. No latency or throughput guarantees are provided by this patch. This interface intends to service the EGL_IMG_context_priority extension, and vulkan equivalents. Setting a priority above NORMAL requires CAP_SYS_NICE or DRM_MASTER. v2: Instead of using flags, repurpose __pad v3: Swap enum values of _NORMAL _HIGH for backwards compatibility v4: Validate usermode priority and store it v5: Move priority validation into amdgpu_ctx_ioctl(), headline reword v6: add UAPI note regarding priorities requiring CAP_SYS_ADMIN v7: remove ctx->priority v8: added AMDGPU_CTX_PRIORITY_LOW, s/CAP_SYS_ADMIN/CAP_SYS_NICE v9: change the priority parameter to __s32 v10: split priorities into _SW and _HW v11: Allow DRM_MASTER without CAP_SYS_NICE Reviewed-by: Emil Velikov Reviewed-by: Christian König Signed-off-by: Andres Rodriguez Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 61 +++++++++++++++++++++++++-- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 5 ++- include/uapi/drm/amdgpu_drm.h | 10 ++++- 3 files changed, 70 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 75c933b1a432..52388b1b52c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -23,13 +23,40 @@ */ #include +#include #include "amdgpu.h" -static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) +static int amdgpu_ctx_priority_permit(struct drm_file *filp, + enum amd_sched_priority priority) +{ + /* NORMAL and below are accessible by everyone */ + if (priority <= AMD_SCHED_PRIORITY_NORMAL) + return 0; + + if (capable(CAP_SYS_NICE)) + return 0; + + if (drm_is_current_master(filp)) + return 0; + + return -EACCES; +} + +static int amdgpu_ctx_init(struct amdgpu_device *adev, + enum amd_sched_priority priority, + struct drm_file *filp, + struct amdgpu_ctx *ctx) { unsigned i, j; int r; + if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX) + return -EINVAL; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + memset(ctx, 0, sizeof(*ctx)); ctx->adev = adev; kref_init(&ctx->refcount); @@ -51,7 +78,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) struct amdgpu_ring *ring = adev->rings[i]; struct amd_sched_rq *rq; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + rq = &ring->sched.sched_rq[priority]; if (ring == &adev->gfx.kiq.ring) continue; @@ -100,6 +127,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) static int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, + struct drm_file *filp, + enum amd_sched_priority priority, uint32_t *id) { struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; @@ -117,8 +146,9 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, kfree(ctx); return r; } + *id = (uint32_t)r; - r = amdgpu_ctx_init(adev, ctx); + r = amdgpu_ctx_init(adev, priority, filp, ctx); if (r) { idr_remove(&mgr->ctx_handles, *id); *id = 0; @@ -188,11 +218,30 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, return 0; } +static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) +{ + switch (amdgpu_priority) { + case AMDGPU_CTX_PRIORITY_HIGH_HW: + return AMD_SCHED_PRIORITY_HIGH_HW; + case AMDGPU_CTX_PRIORITY_HIGH_SW: + return AMD_SCHED_PRIORITY_HIGH_SW; + case AMDGPU_CTX_PRIORITY_NORMAL: + return AMD_SCHED_PRIORITY_NORMAL; + case AMDGPU_CTX_PRIORITY_LOW_SW: + case AMDGPU_CTX_PRIORITY_LOW_HW: + return AMD_SCHED_PRIORITY_LOW; + default: + WARN(1, "Invalid context priority %d\n", amdgpu_priority); + return AMD_SCHED_PRIORITY_NORMAL; + } +} + int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { int r; uint32_t id; + enum amd_sched_priority priority; union drm_amdgpu_ctx *args = data; struct amdgpu_device *adev = dev->dev_private; @@ -200,10 +249,14 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, r = 0; id = args->in.ctx_id; + priority = amdgpu_to_sched_priority(args->in.priority); + + if (priority >= AMD_SCHED_PRIORITY_MAX) + return -EINVAL; switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: - r = amdgpu_ctx_alloc(adev, fpriv, &id); + r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); args->out.alloc.ctx_id = id; break; case AMDGPU_CTX_OP_FREE_CTX: diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index f9d8f28efd16..38e622ce06de 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -115,7 +115,10 @@ struct amd_sched_backend_ops { enum amd_sched_priority { AMD_SCHED_PRIORITY_MIN, - AMD_SCHED_PRIORITY_NORMAL = AMD_SCHED_PRIORITY_MIN, + AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN, + AMD_SCHED_PRIORITY_NORMAL, + AMD_SCHED_PRIORITY_HIGH_SW, + AMD_SCHED_PRIORITY_HIGH_HW, AMD_SCHED_PRIORITY_KERNEL, AMD_SCHED_PRIORITY_MAX }; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index b62484af8ccb..94ef0af492dc 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -168,13 +168,21 @@ union drm_amdgpu_bo_list { /* unknown cause */ #define AMDGPU_CTX_UNKNOWN_RESET 3 +/* Context priority level */ +#define AMDGPU_CTX_PRIORITY_LOW_HW -1023 +#define AMDGPU_CTX_PRIORITY_LOW_SW -512 +#define AMDGPU_CTX_PRIORITY_NORMAL 0 +/* Selecting a priority above NORMAL requires CAP_SYS_NICE or DRM_MASTER */ +#define AMDGPU_CTX_PRIORITY_HIGH_SW 512 +#define AMDGPU_CTX_PRIORITY_HIGH_HW 1023 + struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ __u32 op; /** For future use, no flags defined so far */ __u32 flags; __u32 ctx_id; - __u32 _pad; + __s32 priority; }; union drm_amdgpu_ctx_out { -- cgit v1.2.3 From f3d19bf80d6c7bfe5922c09604a402ef176da41f Mon Sep 17 00:00:00 2001 From: Andres Rodriguez Date: Mon, 26 Jun 2017 16:12:10 -0400 Subject: drm/amdgpu: introduce AMDGPU_CTX_PRIORITY_UNSET MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use _INVALID to identify bad parameters and _UNSET to represent the lack of interest in a specific value. Signed-off-by: Andres Rodriguez Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 ++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 ++- include/uapi/drm/amdgpu_drm.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index d3d63f78bec9..29eebdc30a4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -230,6 +230,8 @@ static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) case AMDGPU_CTX_PRIORITY_LOW_SW: case AMDGPU_CTX_PRIORITY_LOW_HW: return AMD_SCHED_PRIORITY_LOW; + case AMDGPU_CTX_PRIORITY_UNSET: + return AMD_SCHED_PRIORITY_UNSET; default: WARN(1, "Invalid context priority %d\n", amdgpu_priority); return AMD_SCHED_PRIORITY_INVALID; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 4b528f7abbfe..52c8e5447624 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -122,7 +122,8 @@ enum amd_sched_priority { AMD_SCHED_PRIORITY_HIGH_HW, AMD_SCHED_PRIORITY_KERNEL, AMD_SCHED_PRIORITY_MAX, - AMD_SCHED_PRIORITY_INVALID = -1 + AMD_SCHED_PRIORITY_INVALID = -1, + AMD_SCHED_PRIORITY_UNSET = -2 }; /** diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 94ef0af492dc..7ea33b57893a 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -169,6 +169,7 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_UNKNOWN_RESET 3 /* Context priority level */ +#define AMDGPU_CTX_PRIORITY_UNSET -2048 #define AMDGPU_CTX_PRIORITY_LOW_HW -1023 #define AMDGPU_CTX_PRIORITY_LOW_SW -512 #define AMDGPU_CTX_PRIORITY_NORMAL 0 -- cgit v1.2.3 From 52c6a62c64fac03a434cdacf6ef671c6a9e9000f Mon Sep 17 00:00:00 2001 From: Andres Rodriguez Date: Mon, 26 Jun 2017 16:17:13 -0400 Subject: drm/amdgpu: add interface for editing a foreign process's priority v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE ioctls are used to set the priority of a different process in the current system. When a request is dropped, the process's contexts will be restored to the priority specified at context creation time. A request can be dropped by setting the override priority to AMDGPU_CTX_PRIORITY_UNSET. An fd is used to identify the remote process. This is simpler than passing a pid number, which is vulnerable to re-use, etc. This functionality is limited to DRM_MASTER since abuse of this interface can have a negative impact on the system's performance. v2: removed unused output structure v3: change refcounted interface for a regular set operation Signed-off-by: Andres Rodriguez Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 21 +----- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 109 ++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h | 34 ++++++++++ include/uapi/drm/amdgpu_drm.h | 17 +++++ 6 files changed, 164 insertions(+), 21 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 25a95c95df14..ef9a3b6d7b62 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ - amdgpu_queue_mgr.o amdgpu_vf_error.o + amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index d2ef24f4b56d..a78b03f65c69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -25,6 +25,7 @@ #include #include #include "amdgpu.h" +#include "amdgpu_sched.h" static int amdgpu_ctx_priority_permit(struct drm_file *filp, enum amd_sched_priority priority) @@ -220,26 +221,6 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, return 0; } -static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) -{ - switch (amdgpu_priority) { - case AMDGPU_CTX_PRIORITY_HIGH_HW: - return AMD_SCHED_PRIORITY_HIGH_HW; - case AMDGPU_CTX_PRIORITY_HIGH_SW: - return AMD_SCHED_PRIORITY_HIGH_SW; - case AMDGPU_CTX_PRIORITY_NORMAL: - return AMD_SCHED_PRIORITY_NORMAL; - case AMDGPU_CTX_PRIORITY_LOW_SW: - case AMDGPU_CTX_PRIORITY_LOW_HW: - return AMD_SCHED_PRIORITY_LOW; - case AMDGPU_CTX_PRIORITY_UNSET: - return AMD_SCHED_PRIORITY_UNSET; - default: - WARN(1, "Invalid context priority %d\n", amdgpu_priority); - return AMD_SCHED_PRIORITY_INVALID; - } -} - int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 51841259e23f..82e8d43b235a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -28,6 +28,7 @@ #include #include "amdgpu.h" #include +#include "amdgpu_sched.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" @@ -1023,6 +1024,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER), DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), /* KMS */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c new file mode 100644 index 000000000000..cd123306eda7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -0,0 +1,109 @@ +/* + * Copyright 2017 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Andres Rodriguez + */ + +#include +#include +#include +#include "amdgpu.h" + +#include "amdgpu_vm.h" + +enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) +{ + switch (amdgpu_priority) { + case AMDGPU_CTX_PRIORITY_HIGH_HW: + return AMD_SCHED_PRIORITY_HIGH_HW; + case AMDGPU_CTX_PRIORITY_HIGH_SW: + return AMD_SCHED_PRIORITY_HIGH_SW; + case AMDGPU_CTX_PRIORITY_NORMAL: + return AMD_SCHED_PRIORITY_NORMAL; + case AMDGPU_CTX_PRIORITY_LOW_SW: + case AMDGPU_CTX_PRIORITY_LOW_HW: + return AMD_SCHED_PRIORITY_LOW; + case AMDGPU_CTX_PRIORITY_UNSET: + return AMD_SCHED_PRIORITY_UNSET; + default: + WARN(1, "Invalid context priority %d\n", amdgpu_priority); + return AMD_SCHED_PRIORITY_INVALID; + } +} + +static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev, + int fd, + enum amd_sched_priority priority) +{ + struct file *filp = fcheck(fd); + struct drm_file *file; + struct pid *pid; + struct amdgpu_fpriv *fpriv; + struct amdgpu_ctx *ctx; + uint32_t id; + + if (!filp) + return -EINVAL; + + pid = get_pid(((struct drm_file *)filp->private_data)->pid); + + mutex_lock(&adev->ddev->filelist_mutex); + list_for_each_entry(file, &adev->ddev->filelist, lhead) { + if (file->pid != pid) + continue; + + fpriv = file->driver_priv; + idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id) + amdgpu_ctx_priority_override(ctx, priority); + } + mutex_unlock(&adev->ddev->filelist_mutex); + + put_pid(pid); + + return 0; +} + +int amdgpu_sched_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + union drm_amdgpu_sched *args = data; + struct amdgpu_device *adev = dev->dev_private; + enum amd_sched_priority priority; + int r; + + priority = amdgpu_to_sched_priority(args->in.priority); + if (args->in.flags || priority == AMD_SCHED_PRIORITY_INVALID) + return -EINVAL; + + switch (args->in.op) { + case AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE: + r = amdgpu_sched_process_priority_override(adev, + args->in.fd, + priority); + break; + default: + DRM_ERROR("Invalid sched op specified: %d\n", args->in.op); + r = -EINVAL; + break; + } + + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h new file mode 100644 index 000000000000..b28c067d3822 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h @@ -0,0 +1,34 @@ +/* + * Copyright 2017 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Andres Rodriguez + */ + +#ifndef __AMDGPU_SCHED_H__ +#define __AMDGPU_SCHED_H__ + +#include + +enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority); +int amdgpu_sched_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + +#endif // __AMDGPU_SCHED_H__ diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 7ea33b57893a..a332de1aeb42 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -53,6 +53,7 @@ extern "C" { #define DRM_AMDGPU_WAIT_FENCES 0x12 #define DRM_AMDGPU_VM 0x13 #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 +#define DRM_AMDGPU_SCHED 0x15 #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -69,6 +70,7 @@ extern "C" { #define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) +#define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 @@ -227,6 +229,21 @@ union drm_amdgpu_vm { struct drm_amdgpu_vm_out out; }; +/* sched ioctl */ +#define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE 1 + +struct drm_amdgpu_sched_in { + /* AMDGPU_SCHED_OP_* */ + __u32 op; + __u32 fd; + __s32 priority; + __u32 flags; +}; + +union drm_amdgpu_sched { + struct drm_amdgpu_sched_in in; +}; + /* * This is not a reliable API and you should expect it to fail for any * number of reasons and have fallback path that do not use userptr to -- cgit v1.2.3 From b9f19259b84dc648f207a46f3581d15eeaedf4b6 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 19 Oct 2017 14:57:48 +0200 Subject: drm/vc4: Add the DRM_IOCTL_VC4_GEM_MADVISE ioctl This ioctl will allow us to purge inactive userspace buffers when the system is running out of contiguous memory. For now, the purge logic is rather dumb in that it does not try to release only the amount of BO needed to meet the last CMA alloc request but instead purges all objects placed in the purgeable pool as soon as we experience a CMA allocation failure. Note that the in-kernel BO cache is always purged before the purgeable cache because those objects are known to be unused while objects marked as purgeable by a userspace application/library might have to be restored when they are marked back as unpurgeable, which can be expensive. Signed-off-by: Boris Brezillon Signed-off-by: Eric Anholt Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20171019125748.3152-1-boris.brezillon@free-electrons.com --- drivers/gpu/drm/vc4/vc4_bo.c | 287 ++++++++++++++++++++++++++++++++++++++-- drivers/gpu/drm/vc4/vc4_drv.c | 10 +- drivers/gpu/drm/vc4/vc4_drv.h | 30 +++++ drivers/gpu/drm/vc4/vc4_gem.c | 156 +++++++++++++++++++++- drivers/gpu/drm/vc4/vc4_plane.c | 20 +++ include/uapi/drm/vc4_drm.h | 19 +++ 6 files changed, 507 insertions(+), 15 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 3afdbf4bc10b..01a53ba304f8 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -53,6 +53,17 @@ static void vc4_bo_stats_dump(struct vc4_dev *vc4) vc4->bo_labels[i].size_allocated / 1024, vc4->bo_labels[i].num_allocated); } + + mutex_lock(&vc4->purgeable.lock); + if (vc4->purgeable.num) + DRM_INFO("%30s: %6zdkb BOs (%d)\n", "userspace BO cache", + vc4->purgeable.size / 1024, vc4->purgeable.num); + + if (vc4->purgeable.purged_num) + DRM_INFO("%30s: %6zdkb BOs (%d)\n", "total purged BO", + vc4->purgeable.purged_size / 1024, + vc4->purgeable.purged_num); + mutex_unlock(&vc4->purgeable.lock); } #ifdef CONFIG_DEBUG_FS @@ -75,6 +86,17 @@ int vc4_bo_stats_debugfs(struct seq_file *m, void *unused) } mutex_unlock(&vc4->bo_lock); + mutex_lock(&vc4->purgeable.lock); + if (vc4->purgeable.num) + seq_printf(m, "%30s: %6dkb BOs (%d)\n", "userspace BO cache", + vc4->purgeable.size / 1024, vc4->purgeable.num); + + if (vc4->purgeable.purged_num) + seq_printf(m, "%30s: %6dkb BOs (%d)\n", "total purged BO", + vc4->purgeable.purged_size / 1024, + vc4->purgeable.purged_num); + mutex_unlock(&vc4->purgeable.lock); + return 0; } #endif @@ -247,6 +269,109 @@ static void vc4_bo_cache_purge(struct drm_device *dev) mutex_unlock(&vc4->bo_lock); } +void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo) +{ + struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); + + mutex_lock(&vc4->purgeable.lock); + list_add_tail(&bo->size_head, &vc4->purgeable.list); + vc4->purgeable.num++; + vc4->purgeable.size += bo->base.base.size; + mutex_unlock(&vc4->purgeable.lock); +} + +static void vc4_bo_remove_from_purgeable_pool_locked(struct vc4_bo *bo) +{ + struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); + + /* list_del_init() is used here because the caller might release + * the purgeable lock in order to acquire the madv one and update the + * madv status. + * During this short period of time a user might decide to mark + * the BO as unpurgeable, and if bo->madv is set to + * VC4_MADV_DONTNEED it will try to remove the BO from the + * purgeable list which will fail if the ->next/prev fields + * are set to LIST_POISON1/LIST_POISON2 (which is what + * list_del() does). + * Re-initializing the list element guarantees that list_del() + * will work correctly even if it's a NOP. + */ + list_del_init(&bo->size_head); + vc4->purgeable.num--; + vc4->purgeable.size -= bo->base.base.size; +} + +void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo) +{ + struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); + + mutex_lock(&vc4->purgeable.lock); + vc4_bo_remove_from_purgeable_pool_locked(bo); + mutex_unlock(&vc4->purgeable.lock); +} + +static void vc4_bo_purge(struct drm_gem_object *obj) +{ + struct vc4_bo *bo = to_vc4_bo(obj); + struct drm_device *dev = obj->dev; + + WARN_ON(!mutex_is_locked(&bo->madv_lock)); + WARN_ON(bo->madv != VC4_MADV_DONTNEED); + + drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping); + + dma_free_wc(dev->dev, obj->size, bo->base.vaddr, bo->base.paddr); + bo->base.vaddr = NULL; + bo->madv = __VC4_MADV_PURGED; +} + +static void vc4_bo_userspace_cache_purge(struct drm_device *dev) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + + mutex_lock(&vc4->purgeable.lock); + while (!list_empty(&vc4->purgeable.list)) { + struct vc4_bo *bo = list_first_entry(&vc4->purgeable.list, + struct vc4_bo, size_head); + struct drm_gem_object *obj = &bo->base.base; + size_t purged_size = 0; + + vc4_bo_remove_from_purgeable_pool_locked(bo); + + /* Release the purgeable lock while we're purging the BO so + * that other people can continue inserting things in the + * purgeable pool without having to wait for all BOs to be + * purged. + */ + mutex_unlock(&vc4->purgeable.lock); + mutex_lock(&bo->madv_lock); + + /* Since we released the purgeable pool lock before acquiring + * the BO madv one, the user may have marked the BO as WILLNEED + * and re-used it in the meantime. + * Before purging the BO we need to make sure + * - it is still marked as DONTNEED + * - it has not been re-inserted in the purgeable list + * - it is not used by HW blocks + * If one of these conditions is not met, just skip the entry. + */ + if (bo->madv == VC4_MADV_DONTNEED && + list_empty(&bo->size_head) && + !refcount_read(&bo->usecnt)) { + purged_size = bo->base.base.size; + vc4_bo_purge(obj); + } + mutex_unlock(&bo->madv_lock); + mutex_lock(&vc4->purgeable.lock); + + if (purged_size) { + vc4->purgeable.purged_size += purged_size; + vc4->purgeable.purged_num++; + } + } + mutex_unlock(&vc4->purgeable.lock); +} + static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev, uint32_t size, enum vc4_kernel_bo_type type) @@ -293,6 +418,9 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) if (!bo) return ERR_PTR(-ENOMEM); + bo->madv = VC4_MADV_WILLNEED; + refcount_set(&bo->usecnt, 0); + mutex_init(&bo->madv_lock); mutex_lock(&vc4->bo_lock); bo->label = VC4_BO_TYPE_KERNEL; vc4->bo_labels[VC4_BO_TYPE_KERNEL].num_allocated++; @@ -330,16 +458,38 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, * CMA allocations we've got laying around and try again. */ vc4_bo_cache_purge(dev); + cma_obj = drm_gem_cma_create(dev, size); + } + if (IS_ERR(cma_obj)) { + /* + * Still not enough CMA memory, purge the userspace BO + * cache and retry. + * This is sub-optimal since we purge the whole userspace + * BO cache which forces user that want to re-use the BO to + * restore its initial content. + * Ideally, we should purge entries one by one and retry + * after each to see if CMA allocation succeeds. Or even + * better, try to find an entry with at least the same + * size. + */ + vc4_bo_userspace_cache_purge(dev); cma_obj = drm_gem_cma_create(dev, size); - if (IS_ERR(cma_obj)) { - DRM_ERROR("Failed to allocate from CMA:\n"); - vc4_bo_stats_dump(vc4); - return ERR_PTR(-ENOMEM); - } + } + + if (IS_ERR(cma_obj)) { + DRM_ERROR("Failed to allocate from CMA:\n"); + vc4_bo_stats_dump(vc4); + return ERR_PTR(-ENOMEM); } bo = to_vc4_bo(&cma_obj->base); + /* By default, BOs do not support the MADV ioctl. This will be enabled + * only on BOs that are exposed to userspace (V3D, V3D_SHADER and DUMB + * BOs). + */ + bo->madv = __VC4_MADV_NOTSUPP; + mutex_lock(&vc4->bo_lock); vc4_bo_set_label(&cma_obj->base, type); mutex_unlock(&vc4->bo_lock); @@ -365,6 +515,8 @@ int vc4_dumb_create(struct drm_file *file_priv, if (IS_ERR(bo)) return PTR_ERR(bo); + bo->madv = VC4_MADV_WILLNEED; + ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle); drm_gem_object_put_unlocked(&bo->base.base); @@ -403,6 +555,12 @@ void vc4_free_object(struct drm_gem_object *gem_bo) struct vc4_bo *bo = to_vc4_bo(gem_bo); struct list_head *cache_list; + /* Remove the BO from the purgeable list. */ + mutex_lock(&bo->madv_lock); + if (bo->madv == VC4_MADV_DONTNEED && !refcount_read(&bo->usecnt)) + vc4_bo_remove_from_purgeable_pool(bo); + mutex_unlock(&bo->madv_lock); + mutex_lock(&vc4->bo_lock); /* If the object references someone else's memory, we can't cache it. */ @@ -418,7 +576,8 @@ void vc4_free_object(struct drm_gem_object *gem_bo) } /* If this object was partially constructed but CMA allocation - * had failed, just free it. + * had failed, just free it. Can also happen when the BO has been + * purged. */ if (!bo->base.vaddr) { vc4_bo_destroy(bo); @@ -437,6 +596,10 @@ void vc4_free_object(struct drm_gem_object *gem_bo) bo->validated_shader = NULL; } + /* Reset madv and usecnt before adding the BO to the cache. */ + bo->madv = __VC4_MADV_NOTSUPP; + refcount_set(&bo->usecnt, 0); + bo->t_format = false; bo->free_time = jiffies; list_add(&bo->size_head, cache_list); @@ -461,6 +624,56 @@ static void vc4_bo_cache_time_work(struct work_struct *work) mutex_unlock(&vc4->bo_lock); } +int vc4_bo_inc_usecnt(struct vc4_bo *bo) +{ + int ret; + + /* Fast path: if the BO is already retained by someone, no need to + * check the madv status. + */ + if (refcount_inc_not_zero(&bo->usecnt)) + return 0; + + mutex_lock(&bo->madv_lock); + switch (bo->madv) { + case VC4_MADV_WILLNEED: + refcount_inc(&bo->usecnt); + ret = 0; + break; + case VC4_MADV_DONTNEED: + /* We shouldn't use a BO marked as purgeable if at least + * someone else retained its content by incrementing usecnt. + * Luckily the BO hasn't been purged yet, but something wrong + * is happening here. Just throw an error instead of + * authorizing this use case. + */ + case __VC4_MADV_PURGED: + /* We can't use a purged BO. */ + default: + /* Invalid madv value. */ + ret = -EINVAL; + break; + } + mutex_unlock(&bo->madv_lock); + + return ret; +} + +void vc4_bo_dec_usecnt(struct vc4_bo *bo) +{ + /* Fast path: if the BO is still retained by someone, no need to test + * the madv value. + */ + if (refcount_dec_not_one(&bo->usecnt)) + return; + + mutex_lock(&bo->madv_lock); + if (refcount_dec_and_test(&bo->usecnt) && + bo->madv == VC4_MADV_DONTNEED) + vc4_bo_add_to_purgeable_pool(bo); + mutex_unlock(&bo->madv_lock); +} + static void vc4_bo_cache_time_timer(unsigned long data) { struct drm_device *dev = (struct drm_device *)data; @@ -480,18 +693,52 @@ struct dma_buf * vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags) { struct vc4_bo *bo = to_vc4_bo(obj); + struct dma_buf *dmabuf; + int ret; if (bo->validated_shader) { DRM_DEBUG("Attempting to export shader BO\n"); return ERR_PTR(-EINVAL); } - return drm_gem_prime_export(dev, obj, flags); + /* Note: as soon as the BO is exported it becomes unpurgeable, because + * noone ever decrements the usecnt even if the reference held by the + * exported BO is released. This shouldn't be a problem since we don't + * expect exported BOs to be marked as purgeable. + */ + ret = vc4_bo_inc_usecnt(bo); + if (ret) { + DRM_ERROR("Failed to increment BO usecnt\n"); + return ERR_PTR(ret); + } + + dmabuf = drm_gem_prime_export(dev, obj, flags); + if (IS_ERR(dmabuf)) + vc4_bo_dec_usecnt(bo); + + return dmabuf; +} + +int vc4_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct drm_gem_object *obj = vma->vm_private_data; + struct vc4_bo *bo = to_vc4_bo(obj); + + /* The only reason we would end up here is when user-space accesses + * BO's memory after it's been purged. + */ + mutex_lock(&bo->madv_lock); + WARN_ON(bo->madv != __VC4_MADV_PURGED); + mutex_unlock(&bo->madv_lock); + + return VM_FAULT_SIGBUS; } int vc4_mmap(struct file *filp, struct vm_area_struct *vma) { struct drm_gem_object *gem_obj; + unsigned long vm_pgoff; struct vc4_bo *bo; int ret; @@ -507,16 +754,36 @@ int vc4_mmap(struct file *filp, struct vm_area_struct *vma) return -EINVAL; } + if (bo->madv != VC4_MADV_WILLNEED) { + DRM_DEBUG("mmaping of %s BO not allowed\n", + bo->madv == VC4_MADV_DONTNEED ? + "purgeable" : "purged"); + return -EINVAL; + } + /* * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map * the whole buffer. */ vma->vm_flags &= ~VM_PFNMAP; - vma->vm_pgoff = 0; + /* This ->vm_pgoff dance is needed to make all parties happy: + * - dma_mmap_wc() uses ->vm_pgoff as an offset within the allocated + * mem-region, hence the need to set it to zero (the value set by + * the DRM core is a virtual offset encoding the GEM object-id) + * - the mmap() core logic needs ->vm_pgoff to be restored to its + * initial value before returning from this function because it + * encodes the offset of this GEM in the dev->anon_inode pseudo-file + * and this information will be used when we invalidate userspace + * mappings with drm_vma_node_unmap() (called from vc4_gem_purge()). + */ + vm_pgoff = vma->vm_pgoff; + vma->vm_pgoff = 0; ret = dma_mmap_wc(bo->base.base.dev->dev, vma, bo->base.vaddr, bo->base.paddr, vma->vm_end - vma->vm_start); + vma->vm_pgoff = vm_pgoff; + if (ret) drm_gem_vm_close(vma); @@ -580,6 +847,8 @@ int vc4_create_bo_ioctl(struct drm_device *dev, void *data, if (IS_ERR(bo)) return PTR_ERR(bo); + bo->madv = VC4_MADV_WILLNEED; + ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle); drm_gem_object_put_unlocked(&bo->base.base); @@ -633,6 +902,8 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, if (IS_ERR(bo)) return PTR_ERR(bo); + bo->madv = VC4_MADV_WILLNEED; + if (copy_from_user(bo->base.vaddr, (void __user *)(uintptr_t)args->data, args->size)) { diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 1c96edcb302b..e3c29729da2e 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -100,6 +100,7 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, case DRM_VC4_PARAM_SUPPORTS_ETC1: case DRM_VC4_PARAM_SUPPORTS_THREADED_FS: case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER: + case DRM_VC4_PARAM_SUPPORTS_MADVISE: args->value = true; break; default: @@ -117,6 +118,12 @@ static void vc4_lastclose(struct drm_device *dev) drm_fbdev_cma_restore_mode(vc4->fbdev); } +static const struct vm_operations_struct vc4_vm_ops = { + .fault = vc4_fault, + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, +}; + static const struct file_operations vc4_drm_fops = { .owner = THIS_MODULE, .open = drm_open, @@ -142,6 +149,7 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(VC4_SET_TILING, vc4_set_tiling_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_LABEL_BO, vc4_label_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW), }; static struct drm_driver vc4_drm_driver = { @@ -166,7 +174,7 @@ static struct drm_driver vc4_drm_driver = { .gem_create_object = vc4_create_object, .gem_free_object_unlocked = vc4_free_object, - .gem_vm_ops = &drm_gem_cma_vm_ops, + .gem_vm_ops = &vc4_vm_ops, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 87f2d8e5c134..9c0d380c96f2 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -74,6 +74,19 @@ struct vc4_dev { /* Protects bo_cache and bo_labels. */ struct mutex bo_lock; + /* Purgeable BO pool. All BOs in this pool can have their memory + * reclaimed if the driver is unable to allocate new BOs. We also + * keep stats related to the purge mechanism here. + */ + struct { + struct list_head list; + unsigned int num; + size_t size; + unsigned int purged_num; + size_t purged_size; + struct mutex lock; + } purgeable; + uint64_t dma_fence_context; /* Sequence number for the last job queued in bin_job_list. @@ -192,6 +205,16 @@ struct vc4_bo { * for user-allocated labels. */ int label; + + /* Count the number of active users. This is needed to determine + * whether we can move the BO to the purgeable list or not (when the BO + * is used by the GPU or the display engine we can't purge it). + */ + refcount_t usecnt; + + /* Store purgeable/purged state here */ + u32 madv; + struct mutex madv_lock; }; static inline struct vc4_bo * @@ -503,6 +526,7 @@ int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int vc4_label_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int vc4_fault(struct vm_fault *vmf); int vc4_mmap(struct file *filp, struct vm_area_struct *vma); struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj); int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); @@ -513,6 +537,10 @@ void *vc4_prime_vmap(struct drm_gem_object *obj); int vc4_bo_cache_init(struct drm_device *dev); void vc4_bo_cache_destroy(struct drm_device *dev); int vc4_bo_stats_debugfs(struct seq_file *m, void *arg); +int vc4_bo_inc_usecnt(struct vc4_bo *bo); +void vc4_bo_dec_usecnt(struct vc4_bo *bo); +void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo); +void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo); /* vc4_crtc.c */ extern struct platform_driver vc4_crtc_driver; @@ -557,6 +585,8 @@ void vc4_job_handle_completed(struct vc4_dev *vc4); int vc4_queue_seqno_cb(struct drm_device *dev, struct vc4_seqno_cb *cb, uint64_t seqno, void (*func)(struct vc4_seqno_cb *cb)); +int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); /* vc4_hdmi.c */ extern struct platform_driver vc4_hdmi_driver; diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index d0c6bfb68c4e..e00ac2f3a264 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -188,11 +188,22 @@ vc4_save_hang_state(struct drm_device *dev) continue; for (j = 0; j < exec[i]->bo_count; j++) { + bo = to_vc4_bo(&exec[i]->bo[j]->base); + + /* Retain BOs just in case they were marked purgeable. + * This prevents the BO from being purged before + * someone had a chance to dump the hang state. + */ + WARN_ON(!refcount_read(&bo->usecnt)); + refcount_inc(&bo->usecnt); drm_gem_object_get(&exec[i]->bo[j]->base); kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base; } list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { + /* No need to retain BOs coming from the ->unref_list + * because they are naturally unpurgeable. + */ drm_gem_object_get(&bo->base.base); kernel_state->bo[j + prev_idx] = &bo->base.base; j++; @@ -233,6 +244,26 @@ vc4_save_hang_state(struct drm_device *dev) state->fdbgs = V3D_READ(V3D_FDBGS); state->errstat = V3D_READ(V3D_ERRSTAT); + /* We need to turn purgeable BOs into unpurgeable ones so that + * userspace has a chance to dump the hang state before the kernel + * decides to purge those BOs. + * Note that BO consistency at dump time cannot be guaranteed. For + * example, if the owner of these BOs decides to re-use them or mark + * them purgeable again there's nothing we can do to prevent it. + */ + for (i = 0; i < kernel_state->user_state.bo_count; i++) { + struct vc4_bo *bo = to_vc4_bo(kernel_state->bo[i]); + + if (bo->madv == __VC4_MADV_NOTSUPP) + continue; + + mutex_lock(&bo->madv_lock); + if (!WARN_ON(bo->madv == __VC4_MADV_PURGED)) + bo->madv = VC4_MADV_WILLNEED; + refcount_dec(&bo->usecnt); + mutex_unlock(&bo->madv_lock); + } + spin_lock_irqsave(&vc4->job_lock, irqflags); if (vc4->hang_state) { spin_unlock_irqrestore(&vc4->job_lock, irqflags); @@ -639,9 +670,6 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, * The command validator needs to reference BOs by their index within * the submitted job's BO list. This does the validation of the job's * BO list and reference counting for the lifetime of the job. - * - * Note that this function doesn't need to unreference the BOs on - * failure, because that will happen at vc4_complete_exec() time. */ static int vc4_cl_lookup_bos(struct drm_device *dev, @@ -693,16 +721,47 @@ vc4_cl_lookup_bos(struct drm_device *dev, DRM_DEBUG("Failed to look up GEM BO %d: %d\n", i, handles[i]); ret = -EINVAL; - spin_unlock(&file_priv->table_lock); - goto fail; + break; } + drm_gem_object_get(bo); exec->bo[i] = (struct drm_gem_cma_object *)bo; } spin_unlock(&file_priv->table_lock); + if (ret) + goto fail_put_bo; + + for (i = 0; i < exec->bo_count; i++) { + ret = vc4_bo_inc_usecnt(to_vc4_bo(&exec->bo[i]->base)); + if (ret) + goto fail_dec_usecnt; + } + + kvfree(handles); + return 0; + +fail_dec_usecnt: + /* Decrease usecnt on acquired objects. + * We cannot rely on vc4_complete_exec() to release resources here, + * because vc4_complete_exec() has no information about which BO has + * had its ->usecnt incremented. + * To make things easier we just free everything explicitly and set + * exec->bo to NULL so that vc4_complete_exec() skips the 'BO release' + * step. + */ + for (i-- ; i >= 0; i--) + vc4_bo_dec_usecnt(to_vc4_bo(&exec->bo[i]->base)); + +fail_put_bo: + /* Release any reference to acquired objects. */ + for (i = 0; i < exec->bo_count && exec->bo[i]; i++) + drm_gem_object_put_unlocked(&exec->bo[i]->base); + fail: kvfree(handles); + kvfree(exec->bo); + exec->bo = NULL; return ret; } @@ -833,8 +892,12 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) dma_fence_signal(exec->fence); if (exec->bo) { - for (i = 0; i < exec->bo_count; i++) + for (i = 0; i < exec->bo_count; i++) { + struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base); + + vc4_bo_dec_usecnt(bo); drm_gem_object_put_unlocked(&exec->bo[i]->base); + } kvfree(exec->bo); } @@ -1098,6 +1161,9 @@ vc4_gem_init(struct drm_device *dev) INIT_WORK(&vc4->job_done_work, vc4_job_done_work); mutex_init(&vc4->power_lock); + + INIT_LIST_HEAD(&vc4->purgeable.list); + mutex_init(&vc4->purgeable.lock); } void @@ -1121,3 +1187,81 @@ vc4_gem_destroy(struct drm_device *dev) if (vc4->hang_state) vc4_free_hang_state(dev, vc4->hang_state); } + +int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_vc4_gem_madvise *args = data; + struct drm_gem_object *gem_obj; + struct vc4_bo *bo; + int ret; + + switch (args->madv) { + case VC4_MADV_DONTNEED: + case VC4_MADV_WILLNEED: + break; + default: + return -EINVAL; + } + + if (args->pad != 0) + return -EINVAL; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + + bo = to_vc4_bo(gem_obj); + + /* Only BOs exposed to userspace can be purged. */ + if (bo->madv == __VC4_MADV_NOTSUPP) { + DRM_DEBUG("madvise not supported on this BO\n"); + ret = -EINVAL; + goto out_put_gem; + } + + /* Not sure it's safe to purge imported BOs. Let's just assume it's + * not until proven otherwise. + */ + if (gem_obj->import_attach) { + DRM_DEBUG("madvise not supported on imported BOs\n"); + ret = -EINVAL; + goto out_put_gem; + } + + mutex_lock(&bo->madv_lock); + + if (args->madv == VC4_MADV_DONTNEED && bo->madv == VC4_MADV_WILLNEED && + !refcount_read(&bo->usecnt)) { + /* If the BO is about to be marked as purgeable, is not used + * and is not already purgeable or purged, add it to the + * purgeable list. + */ + vc4_bo_add_to_purgeable_pool(bo); + } else if (args->madv == VC4_MADV_WILLNEED && + bo->madv == VC4_MADV_DONTNEED && + !refcount_read(&bo->usecnt)) { + /* The BO has not been purged yet, just remove it from + * the purgeable list. + */ + vc4_bo_remove_from_purgeable_pool(bo); + } + + /* Save the purged state. */ + args->retained = bo->madv != __VC4_MADV_PURGED; + + /* Update internal madv state only if the bo was not purged. */ + if (bo->madv != __VC4_MADV_PURGED) + bo->madv = args->madv; + + mutex_unlock(&bo->madv_lock); + + ret = 0; + +out_put_gem: + drm_gem_object_put_unlocked(gem_obj); + + return ret; +} diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 3a767a038f72..423a23ed8fc2 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -23,6 +23,7 @@ #include #include +#include "uapi/drm/vc4_drm.h" #include "vc4_drv.h" #include "vc4_regs.h" @@ -774,21 +775,40 @@ static int vc4_prepare_fb(struct drm_plane *plane, { struct vc4_bo *bo; struct dma_fence *fence; + int ret; if ((plane->state->fb == state->fb) || !state->fb) return 0; bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); + + ret = vc4_bo_inc_usecnt(bo); + if (ret) + return ret; + fence = reservation_object_get_excl_rcu(bo->resv); drm_atomic_set_fence_for_plane(state, fence); return 0; } +static void vc4_cleanup_fb(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct vc4_bo *bo; + + if (plane->state->fb == state->fb || !state->fb) + return; + + bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); + vc4_bo_dec_usecnt(bo); +} + static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { .atomic_check = vc4_plane_atomic_check, .atomic_update = vc4_plane_atomic_update, .prepare_fb = vc4_prepare_fb, + .cleanup_fb = vc4_cleanup_fb, }; static void vc4_plane_destroy(struct drm_plane *plane) diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index afae87004963..52263b575bdc 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -41,6 +41,7 @@ extern "C" { #define DRM_VC4_SET_TILING 0x08 #define DRM_VC4_GET_TILING 0x09 #define DRM_VC4_LABEL_BO 0x0a +#define DRM_VC4_GEM_MADVISE 0x0b #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno) @@ -53,6 +54,7 @@ extern "C" { #define DRM_IOCTL_VC4_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SET_TILING, struct drm_vc4_set_tiling) #define DRM_IOCTL_VC4_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, struct drm_vc4_get_tiling) #define DRM_IOCTL_VC4_LABEL_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_LABEL_BO, struct drm_vc4_label_bo) +#define DRM_IOCTL_VC4_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GEM_MADVISE, struct drm_vc4_gem_madvise) struct drm_vc4_submit_rcl_surface { __u32 hindex; /* Handle index, or ~0 if not present. */ @@ -305,6 +307,7 @@ struct drm_vc4_get_hang_state { #define DRM_VC4_PARAM_SUPPORTS_ETC1 4 #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS 5 #define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER 6 +#define DRM_VC4_PARAM_SUPPORTS_MADVISE 7 struct drm_vc4_get_param { __u32 param; @@ -333,6 +336,22 @@ struct drm_vc4_label_bo { __u64 name; }; +/* + * States prefixed with '__' are internal states and cannot be passed to the + * DRM_IOCTL_VC4_GEM_MADVISE ioctl. + */ +#define VC4_MADV_WILLNEED 0 +#define VC4_MADV_DONTNEED 1 +#define __VC4_MADV_PURGED 2 +#define __VC4_MADV_NOTSUPP 3 + +struct drm_vc4_gem_madvise { + __u32 handle; + __u32 madv; + __u32 retained; + __u32 pad; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 8bc4c256f4995d315eb9cce6e47b4885c79ff661 Mon Sep 17 00:00:00 2001 From: Andres Rodriguez Date: Fri, 13 Oct 2017 14:58:14 -0400 Subject: drm/amdgpu: rename context priority levels Don't leak implementation details about how each priority behaves to usermode. This allows greater flexibility in the future. Squash into c2636dc53abd8269a0930bccd564f2f195dba729 Signed-off-by: Andres Rodriguez Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 8 ++++---- include/uapi/drm/amdgpu_drm.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index cd123306eda7..290cc3f9c433 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -32,14 +32,14 @@ enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) { switch (amdgpu_priority) { - case AMDGPU_CTX_PRIORITY_HIGH_HW: + case AMDGPU_CTX_PRIORITY_VERY_HIGH: return AMD_SCHED_PRIORITY_HIGH_HW; - case AMDGPU_CTX_PRIORITY_HIGH_SW: + case AMDGPU_CTX_PRIORITY_HIGH: return AMD_SCHED_PRIORITY_HIGH_SW; case AMDGPU_CTX_PRIORITY_NORMAL: return AMD_SCHED_PRIORITY_NORMAL; - case AMDGPU_CTX_PRIORITY_LOW_SW: - case AMDGPU_CTX_PRIORITY_LOW_HW: + case AMDGPU_CTX_PRIORITY_LOW: + case AMDGPU_CTX_PRIORITY_VERY_LOW: return AMD_SCHED_PRIORITY_LOW; case AMDGPU_CTX_PRIORITY_UNSET: return AMD_SCHED_PRIORITY_UNSET; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index a332de1aeb42..d0a3ea69eab6 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -172,12 +172,12 @@ union drm_amdgpu_bo_list { /* Context priority level */ #define AMDGPU_CTX_PRIORITY_UNSET -2048 -#define AMDGPU_CTX_PRIORITY_LOW_HW -1023 -#define AMDGPU_CTX_PRIORITY_LOW_SW -512 +#define AMDGPU_CTX_PRIORITY_VERY_LOW -1023 +#define AMDGPU_CTX_PRIORITY_LOW -512 #define AMDGPU_CTX_PRIORITY_NORMAL 0 /* Selecting a priority above NORMAL requires CAP_SYS_NICE or DRM_MASTER */ -#define AMDGPU_CTX_PRIORITY_HIGH_SW 512 -#define AMDGPU_CTX_PRIORITY_HIGH_HW 1023 +#define AMDGPU_CTX_PRIORITY_HIGH 512 +#define AMDGPU_CTX_PRIORITY_VERY_HIGH 1023 struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ -- cgit v1.2.3 From 1f7251b73e08395dbf03602a57ca67cf7da7f9db Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 9 Oct 2017 17:53:06 +0200 Subject: drm/amdgpu: add VRAM lost query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows userspace to figure out if VRAM was lost. Signed-off-by: Christian König Reviewed-by: Nicolai Hähnle Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +++ include/uapi/drm/amdgpu_drm.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index ff1a416a66c9..6f0b26dae3b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -763,6 +763,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file } return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; } + case AMDGPU_INFO_VRAM_LOST_COUNTER: + ui32 = atomic_read(&adev->vram_lost_counter); + return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; default: DRM_DEBUG_KMS("Invalid request %d\n", info->query); return -EINVAL; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index d0a3ea69eab6..ff0181829f3d 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -657,6 +657,7 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_SENSOR_VDDGFX 0x7 /* Number of VRAM page faults on CPU access. */ #define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS 0x1E +#define AMDGPU_INFO_VRAM_LOST_COUNTER 0x1F #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff -- cgit v1.2.3 From a961e40917fb14614d368d8bc9782ca4d6a8cd11 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 19 Oct 2017 13:30:15 -0400 Subject: membarrier: Provide register expedited private command This introduces a "register private expedited" membarrier command which allows eventual removal of important memory barrier constraints on the scheduler fast-paths. It changes how the "private expedited" membarrier command (new to 4.14) is used from user-space. This new command allows processes to register their intent to use the private expedited command. This affects how the expedited private command introduced in 4.14-rc is meant to be used, and should be merged before 4.14 final. Processes are now required to register before using MEMBARRIER_CMD_PRIVATE_EXPEDITED, otherwise that command returns EPERM. This fixes a problem that arose when designing requested extensions to sys_membarrier() to allow JITs to efficiently flush old code from instruction caches. Several potential algorithms are much less painful if the user register intent to use this functionality early on, for example, before the process spawns the second thread. Registering at this time removes the need to interrupt each and every thread in that process at the first expedited sys_membarrier() system call. Signed-off-by: Mathieu Desnoyers Acked-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Alexander Viro Signed-off-by: Linus Torvalds --- fs/exec.c | 1 + include/linux/mm_types.h | 3 +++ include/linux/sched/mm.h | 16 ++++++++++++++++ include/uapi/linux/membarrier.h | 23 ++++++++++++++++------- kernel/sched/membarrier.c | 34 ++++++++++++++++++++++++++++++---- 5 files changed, 66 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/fs/exec.c b/fs/exec.c index 5470d3c1892a..3e14ba25f678 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1802,6 +1802,7 @@ static int do_execveat_common(int fd, struct filename *filename, /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; + membarrier_execve(current); acct_update_integrals(current); task_numa_free(current); free_bprm(bprm); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 46f4ecf5479a..1861ea8dba77 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -445,6 +445,9 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access the bits */ struct core_state *core_state; /* coredumping support */ +#ifdef CONFIG_MEMBARRIER + atomic_t membarrier_state; +#endif #ifdef CONFIG_AIO spinlock_t ioctx_lock; struct kioctx_table __rcu *ioctx_table; diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index ae53e413fb13..ab9bf7b73954 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -211,4 +211,20 @@ static inline void memalloc_noreclaim_restore(unsigned int flags) current->flags = (current->flags & ~PF_MEMALLOC) | flags; } +#ifdef CONFIG_MEMBARRIER +enum { + MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), + MEMBARRIER_STATE_SWITCH_MM = (1U << 1), +}; + +static inline void membarrier_execve(struct task_struct *t) +{ + atomic_set(&t->mm->membarrier_state, 0); +} +#else +static inline void membarrier_execve(struct task_struct *t) +{ +} +#endif + #endif /* _LINUX_SCHED_MM_H */ diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h index 6d47b3249d8a..4e01ad7ffe98 100644 --- a/include/uapi/linux/membarrier.h +++ b/include/uapi/linux/membarrier.h @@ -52,21 +52,30 @@ * (non-running threads are de facto in such a * state). This only covers threads from the * same processes as the caller thread. This - * command returns 0. The "expedited" commands - * complete faster than the non-expedited ones, - * they never block, but have the downside of - * causing extra overhead. + * command returns 0 on success. The + * "expedited" commands complete faster than + * the non-expedited ones, they never block, + * but have the downside of causing extra + * overhead. A process needs to register its + * intent to use the private expedited command + * prior to using it, otherwise this command + * returns -EPERM. + * @MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: + * Register the process intent to use + * MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always + * returns 0. * * Command to be passed to the membarrier system call. The commands need to * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to * the value 0. */ enum membarrier_cmd { - MEMBARRIER_CMD_QUERY = 0, - MEMBARRIER_CMD_SHARED = (1 << 0), + MEMBARRIER_CMD_QUERY = 0, + MEMBARRIER_CMD_SHARED = (1 << 0), /* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */ /* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */ - MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3), + MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3), + MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4), }; #endif /* _UAPI_LINUX_MEMBARRIER_H */ diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index a92fddc22747..dd7908743dab 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "sched.h" /* for cpu_rq(). */ @@ -26,21 +27,26 @@ * except MEMBARRIER_CMD_QUERY. */ #define MEMBARRIER_CMD_BITMASK \ - (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED) + (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED \ + | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED) static void ipi_mb(void *info) { smp_mb(); /* IPIs should be serializing but paranoid. */ } -static void membarrier_private_expedited(void) +static int membarrier_private_expedited(void) { int cpu; bool fallback = false; cpumask_var_t tmpmask; + if (!(atomic_read(¤t->mm->membarrier_state) + & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)) + return -EPERM; + if (num_online_cpus() == 1) - return; + return 0; /* * Matches memory barriers around rq->curr modification in @@ -94,6 +100,24 @@ static void membarrier_private_expedited(void) * rq->curr modification in scheduler. */ smp_mb(); /* exit from system call is not a mb */ + return 0; +} + +static void membarrier_register_private_expedited(void) +{ + struct task_struct *p = current; + struct mm_struct *mm = p->mm; + + /* + * We need to consider threads belonging to different thread + * groups, which use the same mm. (CLONE_VM but not + * CLONE_THREAD). + */ + if (atomic_read(&mm->membarrier_state) + & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY) + return; + atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY, + &mm->membarrier_state); } /** @@ -144,7 +168,9 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) synchronize_sched(); return 0; case MEMBARRIER_CMD_PRIVATE_EXPEDITED: - membarrier_private_expedited(); + return membarrier_private_expedited(); + case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: + membarrier_register_private_expedited(); return 0; default: return -EINVAL; -- cgit v1.2.3 From 56e0349f384cbadf3b939cbcebe6e1560513246e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 20 Oct 2017 10:52:18 +1000 Subject: amdgpu: add padding to the fence to handle ioctl. I don't think this ioctl is in a Linus release yet. Signed-off-by: Dave Airlie Signed-off-by: Alex Deucher Reviewed-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index ff0181829f3d..919248fb4028 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -553,6 +553,7 @@ union drm_amdgpu_fence_to_handle { struct { struct drm_amdgpu_fence fence; __u32 what; + __u32 pad; } in; struct { __u32 handle; -- cgit v1.2.3 From 3064abfa932bd09faf8da01741d171d476cf7193 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 29 Jun 2017 22:49:31 -0700 Subject: drm: Add CRTC_GET_SEQUENCE and CRTC_QUEUE_SEQUENCE ioctls [v3] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These provide crtc-id based functions instead of pipe-number, while also offering higher resolution time (ns) and wider frame count (64) as required by the Vulkan API. v2: * Check for DRIVER_MODESET in new crtc-based vblank ioctls Failing to check this will oops the driver. * Ensure vblank interupt is running in crtc_get_sequence ioctl The sequence and timing values are not correct while the interrupt is off, so make sure it's running before asking for them. * Short-circuit get_sequence if the counter is enabled and accurate Steal the idea from the code in wait_vblank to avoid the expense of drm_vblank_get/put * Return active state of crtc in crtc_get_sequence ioctl Might be useful for applications that aren't in charge of modesetting? * Use drm_crtc_vblank_get/put in new crtc-based vblank sequence ioctls Daniel Vetter prefers these over the old drm_vblank_put/get APIs. * Return s64 ns instead of u64 in new sequence event Suggested-by: Daniel Vetter Suggested-by: Ville Syrjälä v3: * Removed FIRST_PIXEL_OUT_FLAG * Document that the timestamp in the query and event are that of the first pixel leaving the display engine for the display (using the same wording as the Vulkan spec). Suggested-by: Michel Dänzer Acked-by: Dave Airlie [airlied: left->leaves (Michel)] Signed-off-by: Keith Packard Reviewed-by: Sean Paul Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_internal.h | 6 ++ drivers/gpu/drm/drm_ioctl.c | 2 + drivers/gpu/drm/drm_vblank.c | 168 +++++++++++++++++++++++++++++++++++++++++ include/drm/drm_vblank.h | 1 + include/uapi/drm/drm.h | 36 +++++++++ 5 files changed, 213 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index edd921adcf33..c9d5a6cd4d41 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -70,6 +70,12 @@ int drm_legacy_modeset_ctl_ioctl(struct drm_device *dev, void *data, int drm_legacy_irq_control(struct drm_device *dev, void *data, struct drm_file *file_priv); +int drm_crtc_get_sequence_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + +int drm_crtc_queue_sequence_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + /* drm_auth.c */ int drm_getmagic(struct drm_device *dev, void *data, struct drm_file *file_priv); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index a78f03155466..9c435a4c0c82 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -663,6 +663,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED), }; #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index 27b6db073a5c..13722c373a6a 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -819,6 +819,11 @@ static void send_vblank_event(struct drm_device *dev, e->event.vbl.tv_sec = tv.tv_sec; e->event.vbl.tv_usec = tv.tv_nsec / 1000; break; + case DRM_EVENT_CRTC_SEQUENCE: + if (seq) + e->event.seq.sequence = seq; + e->event.seq.time_ns = ktime_to_ns(now); + break; } trace_drm_vblank_event_delivered(e->base.file_priv, e->pipe, seq); drm_send_event_locked(dev, &e->base); @@ -1650,3 +1655,166 @@ bool drm_crtc_handle_vblank(struct drm_crtc *crtc) return drm_handle_vblank(crtc->dev, drm_crtc_index(crtc)); } EXPORT_SYMBOL(drm_crtc_handle_vblank); + +/* + * Get crtc VBLANK count. + * + * \param dev DRM device + * \param data user arguement, pointing to a drm_crtc_get_sequence structure. + * \param file_priv drm file private for the user's open file descriptor + */ + +int drm_crtc_get_sequence_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_crtc *crtc; + struct drm_vblank_crtc *vblank; + int pipe; + struct drm_crtc_get_sequence *get_seq = data; + ktime_t now; + bool vblank_enabled; + int ret; + + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + + if (!dev->irq_enabled) + return -EINVAL; + + crtc = drm_crtc_find(dev, file_priv, get_seq->crtc_id); + if (!crtc) + return -ENOENT; + + pipe = drm_crtc_index(crtc); + + vblank = &dev->vblank[pipe]; + vblank_enabled = dev->vblank_disable_immediate && READ_ONCE(vblank->enabled); + + if (!vblank_enabled) { + ret = drm_crtc_vblank_get(crtc); + if (ret) { + DRM_DEBUG("crtc %d failed to acquire vblank counter, %d\n", pipe, ret); + return ret; + } + } + drm_modeset_lock(&crtc->mutex, NULL); + if (crtc->state) + get_seq->active = crtc->state->enable; + else + get_seq->active = crtc->enabled; + drm_modeset_unlock(&crtc->mutex); + get_seq->sequence = drm_vblank_count_and_time(dev, pipe, &now); + get_seq->sequence_ns = ktime_to_ns(now); + if (!vblank_enabled) + drm_crtc_vblank_put(crtc); + return 0; +} + +/* + * Queue a event for VBLANK sequence + * + * \param dev DRM device + * \param data user arguement, pointing to a drm_crtc_queue_sequence structure. + * \param file_priv drm file private for the user's open file descriptor + */ + +int drm_crtc_queue_sequence_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_crtc *crtc; + struct drm_vblank_crtc *vblank; + int pipe; + struct drm_crtc_queue_sequence *queue_seq = data; + ktime_t now; + struct drm_pending_vblank_event *e; + u32 flags; + u64 seq; + u64 req_seq; + int ret; + unsigned long spin_flags; + + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + + if (!dev->irq_enabled) + return -EINVAL; + + crtc = drm_crtc_find(dev, file_priv, queue_seq->crtc_id); + if (!crtc) + return -ENOENT; + + flags = queue_seq->flags; + /* Check valid flag bits */ + if (flags & ~(DRM_CRTC_SEQUENCE_RELATIVE| + DRM_CRTC_SEQUENCE_NEXT_ON_MISS)) + return -EINVAL; + + pipe = drm_crtc_index(crtc); + + vblank = &dev->vblank[pipe]; + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (e == NULL) + return -ENOMEM; + + ret = drm_crtc_vblank_get(crtc); + if (ret) { + DRM_DEBUG("crtc %d failed to acquire vblank counter, %d\n", pipe, ret); + goto err_free; + } + + seq = drm_vblank_count_and_time(dev, pipe, &now); + req_seq = queue_seq->sequence; + + if (flags & DRM_CRTC_SEQUENCE_RELATIVE) + req_seq += seq; + + if ((flags & DRM_CRTC_SEQUENCE_NEXT_ON_MISS) && vblank_passed(seq, req_seq)) + req_seq = seq + 1; + + e->pipe = pipe; + e->event.base.type = DRM_EVENT_CRTC_SEQUENCE; + e->event.base.length = sizeof(e->event.seq); + e->event.seq.user_data = queue_seq->user_data; + + spin_lock_irqsave(&dev->event_lock, spin_flags); + + /* + * drm_crtc_vblank_off() might have been called after we called + * drm_crtc_vblank_get(). drm_crtc_vblank_off() holds event_lock around the + * vblank disable, so no need for further locking. The reference from + * drm_crtc_vblank_get() protects against vblank disable from another source. + */ + if (!READ_ONCE(vblank->enabled)) { + ret = -EINVAL; + goto err_unlock; + } + + ret = drm_event_reserve_init_locked(dev, file_priv, &e->base, + &e->event.base); + + if (ret) + goto err_unlock; + + e->sequence = req_seq; + + if (vblank_passed(seq, req_seq)) { + drm_crtc_vblank_put(crtc); + send_vblank_event(dev, e, seq, now); + queue_seq->sequence = seq; + } else { + /* drm_handle_vblank_events will call drm_vblank_put */ + list_add_tail(&e->base.link, &dev->vblank_event_list); + queue_seq->sequence = req_seq; + } + + spin_unlock_irqrestore(&dev->event_lock, spin_flags); + return 0; + +err_unlock: + spin_unlock_irqrestore(&dev->event_lock, spin_flags); + drm_crtc_vblank_put(crtc); +err_free: + kfree(e); + return ret; +} diff --git a/include/drm/drm_vblank.h b/include/drm/drm_vblank.h index bf8e07035a0a..848b463a0af5 100644 --- a/include/drm/drm_vblank.h +++ b/include/drm/drm_vblank.h @@ -57,6 +57,7 @@ struct drm_pending_vblank_event { union { struct drm_event base; struct drm_event_vblank vbl; + struct drm_event_crtc_sequence seq; } event; }; diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 97677cd6964d..91d83c1747c0 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -737,6 +737,28 @@ struct drm_syncobj_array { __u32 pad; }; +/* Query current scanout sequence number */ +struct drm_crtc_get_sequence { + __u32 crtc_id; /* requested crtc_id */ + __u32 active; /* return: crtc output is active */ + __u64 sequence; /* return: most recent vblank sequence */ + __s64 sequence_ns; /* return: most recent time of first pixel out */ +}; + +/* Queue event to be delivered at specified sequence. Time stamp marks + * when the first pixel of the refresh cycle leaves the display engine + * for the display + */ +#define DRM_CRTC_SEQUENCE_RELATIVE 0x00000001 /* sequence is relative to current */ +#define DRM_CRTC_SEQUENCE_NEXT_ON_MISS 0x00000002 /* Use next sequence if we've missed */ + +struct drm_crtc_queue_sequence { + __u32 crtc_id; + __u32 flags; + __u64 sequence; /* on input, target sequence. on output, actual sequence */ + __u64 user_data; /* user data passed to event */ +}; + #if defined(__cplusplus) } #endif @@ -819,6 +841,9 @@ extern "C" { #define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank) +#define DRM_IOCTL_CRTC_GET_SEQUENCE DRM_IOWR(0x3b, struct drm_crtc_get_sequence) +#define DRM_IOCTL_CRTC_QUEUE_SEQUENCE DRM_IOWR(0x3c, struct drm_crtc_queue_sequence) + #define DRM_IOCTL_UPDATE_DRAW DRM_IOW(0x3f, struct drm_update_draw) #define DRM_IOCTL_MODE_GETRESOURCES DRM_IOWR(0xA0, struct drm_mode_card_res) @@ -893,6 +918,7 @@ struct drm_event { #define DRM_EVENT_VBLANK 0x01 #define DRM_EVENT_FLIP_COMPLETE 0x02 +#define DRM_EVENT_CRTC_SEQUENCE 0x03 struct drm_event_vblank { struct drm_event base; @@ -903,6 +929,16 @@ struct drm_event_vblank { __u32 crtc_id; /* 0 on older kernels that do not support this */ }; +/* Event delivered at sequence. Time stamp marks when the first pixel + * of the refresh cycle leaves the display engine for the display + */ +struct drm_event_crtc_sequence { + struct drm_event base; + __u64 user_data; + __s64 time_ns; + __u64 sequence; +}; + /* typedef area */ #ifndef __KERNEL__ typedef struct drm_clip_rect drm_clip_rect_t; -- cgit v1.2.3 From 62884cd386b876638720ef88374b31a84ca7ee5f Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 16 Mar 2017 17:56:28 -0700 Subject: drm: Add four ioctls for managing drm mode object leases [v7] drm_mode_create_lease Creates a lease for a list of drm mode objects, returning an fd for the new drm_master and a 64-bit identifier for the lessee drm_mode_list_lesees List the identifiers of the lessees for a master file drm_mode_get_lease List the leased objects for a master file drm_mode_revoke_lease Erase the set of objects managed by a lease. This should suffice to at least create and query leases. Changes for v2 as suggested by Daniel Vetter : * query ioctls only query the master associated with the provided file. * 'mask_lease' value has been removed * change ioctl has been removed. Changes for v3 suggested in part by Dave Airlie * Add revoke ioctl. Changes for v4 suggested by Dave Airlie * Expand on the comment about the magic use of &drm_lease_idr_object * Pad lease ioctl structures to align on 64-bit boundaries Changes for v5 suggested by Dave Airlie * Check for non-negative object_id in create_lease to avoid debug output from the kernel. Changes for v6 provided by Dave Airlie * For non-universal planes add primary/cursor planes to lease If we aren't exposing universal planes to this userspace client, and it requests a lease on a crtc, we should implicitly export the primary and cursor planes for the crtc. If the lessee doesn't request universal planes, it will just see the crtc, but if it does request them it will then see the plane objects as well. This also moves the object look ups earlier as a side effect, so we'd exit the ioctl quicker for non-existant objects. * Restrict leases to crtc/connector/planes. This only allows leasing for objects we wish to allow. Changes for v7 provided by Dave Airlie * Check pad args are 0 * Check create flags and object count are valid. * Check return from fd allocation * Refactor lease idr setup and add some simple validation * Use idr_mutex uniformly (Keith) Signed-off-by: Keith Packard Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_ioctl.c | 4 + drivers/gpu/drm/drm_lease.c | 412 ++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/drm_mode_object.c | 5 +- include/drm/drm_lease.h | 12 ++ include/drm/drm_mode_object.h | 2 + include/uapi/drm/drm.h | 5 + include/uapi/drm/drm_mode.h | 66 ++++++ 7 files changed, 504 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 9c435a4c0c82..4aafe4802099 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -665,6 +665,10 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), }; #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index 030db1170570..d1eb56a1eff4 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -23,6 +23,8 @@ #define drm_for_each_lessee(lessee, lessor) \ list_for_each_entry((lessee), &(lessor)->lessees, lessee_list) +static uint64_t drm_lease_idr_object; + /** * drm_lease_owner - return ancestor owner drm_master * @master: drm_master somewhere within tree of lessees and lessors @@ -353,3 +355,413 @@ void drm_lease_revoke(struct drm_master *top) _drm_lease_revoke(top); mutex_unlock(&top->dev->mode_config.idr_mutex); } + +static int validate_lease(struct drm_device *dev, + struct drm_file *lessor_priv, + int object_count, + struct drm_mode_object **objects) +{ + int o; + int has_crtc = -1; + int has_connector = -1; + int has_plane = -1; + + /* we want to confirm that there is at least one crtc, plane + connector object. */ + + for (o = 0; o < object_count; o++) { + if (objects[o]->type == DRM_MODE_OBJECT_CRTC && has_crtc == -1) { + has_crtc = o; + } + if (objects[o]->type == DRM_MODE_OBJECT_CONNECTOR && has_connector == -1) + has_connector = o; + + if (lessor_priv->universal_planes) { + if (objects[o]->type == DRM_MODE_OBJECT_PLANE && has_plane == -1) + has_plane = o; + } + } + if (has_crtc == -1 || has_connector == -1) + return -EINVAL; + if (lessor_priv->universal_planes && has_plane == -1) + return -EINVAL; + return 0; +} + +static int fill_object_idr(struct drm_device *dev, + struct drm_file *lessor_priv, + struct idr *leases, + int object_count, + u32 *object_ids) +{ + struct drm_mode_object **objects; + u32 o; + int ret; + objects = kcalloc(object_count, sizeof(struct drm_mode_object *), + GFP_KERNEL); + if (!objects) + return -ENOMEM; + + /* step one - get references to all the mode objects + and check for validity. */ + for (o = 0; o < object_count; o++) { + if ((int) object_ids[o] < 0) { + ret = -EINVAL; + goto out_free_objects; + } + + objects[o] = drm_mode_object_find(dev, lessor_priv, + object_ids[o], + DRM_MODE_OBJECT_ANY); + if (!objects[o]) { + ret = -ENOENT; + goto out_free_objects; + } + + if (!drm_mode_object_lease_required(objects[o]->type)) { + ret = -EINVAL; + goto out_free_objects; + } + } + + ret = validate_lease(dev, lessor_priv, object_count, objects); + if (ret) + goto out_free_objects; + + /* add their IDs to the lease request - taking into account + universal planes */ + for (o = 0; o < object_count; o++) { + struct drm_mode_object *obj = objects[o]; + u32 object_id = objects[o]->id; + DRM_DEBUG_LEASE("Adding object %d to lease\n", object_id); + + /* + * We're using an IDR to hold the set of leased + * objects, but we don't need to point at the object's + * data structure from the lease as the main crtc_idr + * will be used to actually find that. Instead, all we + * really want is a 'leased/not-leased' result, for + * which any non-NULL pointer will work fine. + */ + ret = idr_alloc(leases, &drm_lease_idr_object , object_id, object_id + 1, GFP_KERNEL); + if (ret < 0) { + DRM_DEBUG_LEASE("Object %d cannot be inserted into leases (%d)\n", + object_id, ret); + goto out_free_objects; + } + if (obj->type == DRM_MODE_OBJECT_CRTC && !lessor_priv->universal_planes) { + struct drm_crtc *crtc = obj_to_crtc(obj); + ret = idr_alloc(leases, &drm_lease_idr_object, crtc->primary->base.id, crtc->primary->base.id + 1, GFP_KERNEL); + if (ret < 0) { + DRM_DEBUG_LEASE("Object primary plane %d cannot be inserted into leases (%d)\n", + object_id, ret); + goto out_free_objects; + } + if (crtc->cursor) { + ret = idr_alloc(leases, &drm_lease_idr_object, crtc->cursor->base.id, crtc->cursor->base.id + 1, GFP_KERNEL); + if (ret < 0) { + DRM_DEBUG_LEASE("Object cursor plane %d cannot be inserted into leases (%d)\n", + object_id, ret); + goto out_free_objects; + } + } + } + } + + ret = 0; +out_free_objects: + for (o = 0; o < object_count; o++) { + if (objects[o]) + drm_mode_object_put(objects[o]); + } + kfree(objects); + return ret; +} + +/** + * drm_mode_create_lease_ioctl - create a new lease + * @dev: the drm device + * @data: pointer to struct drm_mode_create_lease + * @file_priv: the file being manipulated + * + * The master associated with the specified file will have a lease + * created containing the objects specified in the ioctl structure. + * A file descriptor will be allocated for that and returned to the + * application. + */ +int drm_mode_create_lease_ioctl(struct drm_device *dev, + void *data, struct drm_file *lessor_priv) +{ + struct drm_mode_create_lease *cl = data; + size_t object_count; + int ret = 0; + struct idr leases; + struct drm_master *lessor = lessor_priv->master; + struct drm_master *lessee = NULL; + struct file *lessee_file = NULL; + struct file *lessor_file = lessor_priv->filp; + struct drm_file *lessee_priv; + int fd = -1; + uint32_t *object_ids; + + /* Can't lease without MODESET */ + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + + /* Do not allow sub-leases */ + if (lessor->lessor) + return -EINVAL; + + /* need some objects */ + if (cl->object_count == 0) + return -EINVAL; + + if (cl->flags && (cl->flags & ~(O_CLOEXEC | O_NONBLOCK))) + return -EINVAL; + + object_count = cl->object_count; + + object_ids = memdup_user(u64_to_user_ptr(cl->object_ids), object_count * sizeof(__u32)); + if (IS_ERR(object_ids)) + return PTR_ERR(object_ids); + + idr_init(&leases); + + /* fill and validate the object idr */ + ret = fill_object_idr(dev, lessor_priv, &leases, + object_count, object_ids); + kfree(object_ids); + if (ret) { + idr_destroy(&leases); + return ret; + } + + /* Allocate a file descriptor for the lease */ + fd = get_unused_fd_flags(cl->flags & (O_CLOEXEC | O_NONBLOCK)); + if (fd < 0) { + idr_destroy(&leases); + return fd; + } + + DRM_DEBUG_LEASE("Creating lease\n"); + lessee = drm_lease_create(lessor, &leases); + + if (IS_ERR(lessee)) { + ret = PTR_ERR(lessee); + goto out_leases; + } + + /* Clone the lessor file to create a new file for us */ + DRM_DEBUG_LEASE("Allocating lease file\n"); + path_get(&lessor_file->f_path); + lessee_file = alloc_file(&lessor_file->f_path, + lessor_file->f_mode, + fops_get(lessor_file->f_inode->i_fop)); + + if (IS_ERR(lessee_file)) { + ret = PTR_ERR(lessee_file); + goto out_lessee; + } + + /* Initialize the new file for DRM */ + DRM_DEBUG_LEASE("Initializing the file with %p\n", lessee_file->f_op->open); + ret = lessee_file->f_op->open(lessee_file->f_inode, lessee_file); + if (ret) + goto out_lessee_file; + + lessee_priv = lessee_file->private_data; + + /* Change the file to a master one */ + drm_master_put(&lessee_priv->master); + lessee_priv->master = lessee; + lessee_priv->is_master = 1; + lessee_priv->authenticated = 1; + + /* Hook up the fd */ + fd_install(fd, lessee_file); + + /* Pass fd back to userspace */ + DRM_DEBUG_LEASE("Returning fd %d id %d\n", fd, lessee->lessee_id); + cl->fd = fd; + cl->lessee_id = lessee->lessee_id; + + DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl succeeded\n"); + return 0; + +out_lessee_file: + fput(lessee_file); + +out_lessee: + drm_master_put(&lessee); + +out_leases: + put_unused_fd(fd); + idr_destroy(&leases); + + DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl failed: %d\n", ret); + return ret; +} + +/** + * drm_mode_list_lessees_ioctl - list lessee ids + * @dev: the drm device + * @data: pointer to struct drm_mode_list_lessees + * @lessor_priv: the file being manipulated + * + * Starting from the master associated with the specified file, + * the master with the provided lessee_id is found, and then + * an array of lessee ids associated with leases from that master + * are returned. + */ + +int drm_mode_list_lessees_ioctl(struct drm_device *dev, + void *data, struct drm_file *lessor_priv) +{ + struct drm_mode_list_lessees *arg = data; + __u32 __user *lessee_ids = (__u32 __user *) (uintptr_t) (arg->lessees_ptr); + __u32 count_lessees = arg->count_lessees; + struct drm_master *lessor = lessor_priv->master, *lessee; + int count; + int ret = 0; + + if (arg->pad) + return -EINVAL; + + /* Can't lease without MODESET */ + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + + DRM_DEBUG_LEASE("List lessees for %d\n", lessor->lessee_id); + + mutex_lock(&dev->mode_config.idr_mutex); + + count = 0; + drm_for_each_lessee(lessee, lessor) { + /* Only list un-revoked leases */ + if (!idr_is_empty(&lessee->leases)) { + if (count_lessees > count) { + DRM_DEBUG_LEASE("Add lessee %d\n", lessee->lessee_id); + ret = put_user(lessee->lessee_id, lessee_ids + count); + if (ret) + break; + } + count++; + } + } + + DRM_DEBUG_LEASE("Lessor leases to %d\n", count); + if (ret == 0) + arg->count_lessees = count; + + mutex_unlock(&dev->mode_config.idr_mutex); + + return ret; +} + +/** + * drm_mode_get_lease_ioctl - list leased objects + * @dev: the drm device + * @data: pointer to struct drm_mode_get_lease + * @file_priv: the file being manipulated + * + * Return the list of leased objects for the specified lessee + */ + +int drm_mode_get_lease_ioctl(struct drm_device *dev, + void *data, struct drm_file *lessee_priv) +{ + struct drm_mode_get_lease *arg = data; + __u32 __user *object_ids = (__u32 __user *) (uintptr_t) (arg->objects_ptr); + __u32 count_objects = arg->count_objects; + struct drm_master *lessee = lessee_priv->master; + struct idr *object_idr; + int count; + void *entry; + int object; + int ret = 0; + + if (arg->pad) + return -EINVAL; + + /* Can't lease without MODESET */ + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + + DRM_DEBUG_LEASE("get lease for %d\n", lessee->lessee_id); + + mutex_lock(&dev->mode_config.idr_mutex); + + if (lessee->lessor == NULL) + /* owner can use all objects */ + object_idr = &lessee->dev->mode_config.crtc_idr; + else + /* lessee can only use allowed object */ + object_idr = &lessee->leases; + + count = 0; + idr_for_each_entry(object_idr, entry, object) { + if (count_objects > count) { + DRM_DEBUG_LEASE("adding object %d\n", object); + ret = put_user(object, object_ids + count); + if (ret) + break; + } + count++; + } + + DRM_DEBUG("lease holds %d objects\n", count); + if (ret == 0) + arg->count_objects = count; + + mutex_unlock(&dev->mode_config.idr_mutex); + + return ret; +} + +/** + * drm_mode_revoke_lease_ioctl - revoke lease + * @dev: the drm device + * @data: pointer to struct drm_mode_revoke_lease + * @file_priv: the file being manipulated + * + * This removes all of the objects from the lease without + * actually getting rid of the lease itself; that way all + * references to it still work correctly + */ +int drm_mode_revoke_lease_ioctl(struct drm_device *dev, + void *data, struct drm_file *lessor_priv) +{ + struct drm_mode_revoke_lease *arg = data; + struct drm_master *lessor = lessor_priv->master; + struct drm_master *lessee; + int ret = 0; + + DRM_DEBUG_LEASE("revoke lease for %d\n", arg->lessee_id); + + /* Can't lease without MODESET */ + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + + mutex_lock(&dev->mode_config.idr_mutex); + + lessee = _drm_find_lessee(lessor, arg->lessee_id); + + /* No such lessee */ + if (!lessee) { + ret = -ENOENT; + goto fail; + } + + /* Lease is not held by lessor */ + if (lessee->lessor != lessor) { + ret = -EACCES; + goto fail; + } + + _drm_lease_revoke(lessee); + +fail: + mutex_unlock(&dev->mode_config.idr_mutex); + + return ret; +} diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c index d1599f36b605..7c8b2698c6a7 100644 --- a/drivers/gpu/drm/drm_mode_object.c +++ b/drivers/gpu/drm/drm_mode_object.c @@ -111,7 +111,7 @@ void drm_mode_object_unregister(struct drm_device *dev, * Returns whether the provided type of drm_mode_object must * be owned or leased to be used by a process. */ -static bool drm_lease_required(uint32_t type) +bool drm_mode_object_lease_required(uint32_t type) { switch(type) { case DRM_MODE_OBJECT_CRTC: @@ -136,7 +136,8 @@ struct drm_mode_object *__drm_mode_object_find(struct drm_device *dev, if (obj && obj->id != id) obj = NULL; - if (obj && drm_lease_required(obj->type) && !_drm_lease_held(file_priv, obj->id)) + if (obj && drm_mode_object_lease_required(obj->type) && + !_drm_lease_held(file_priv, obj->id)) obj = NULL; if (obj && obj->free_cb) { diff --git a/include/drm/drm_lease.h b/include/drm/drm_lease.h index 6149e56ddbf3..fbc0ab54855b 100644 --- a/include/drm/drm_lease.h +++ b/include/drm/drm_lease.h @@ -31,4 +31,16 @@ void drm_lease_revoke(struct drm_master *master); uint32_t drm_lease_filter_crtcs(struct drm_file *file_priv, uint32_t crtcs); +int drm_mode_create_lease_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv); + +int drm_mode_list_lessees_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv); + +int drm_mode_get_lease_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv); + +int drm_mode_revoke_lease_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv); + #endif /* _DRM_LEASE_H_ */ diff --git a/include/drm/drm_mode_object.h b/include/drm/drm_mode_object.h index c8155cb5a932..7ba3913f30b5 100644 --- a/include/drm/drm_mode_object.h +++ b/include/drm/drm_mode_object.h @@ -154,4 +154,6 @@ int drm_object_property_get_value(struct drm_mode_object *obj, void drm_object_attach_property(struct drm_mode_object *obj, struct drm_property *property, uint64_t init_val); + +bool drm_mode_object_lease_required(uint32_t type); #endif diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 91d83c1747c0..6fdff5945c8a 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -888,6 +888,11 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) #define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array) +#define DRM_IOCTL_MODE_CREATE_LEASE DRM_IOWR(0xC6, struct drm_mode_create_lease) +#define DRM_IOCTL_MODE_LIST_LESSEES DRM_IOWR(0xC7, struct drm_mode_list_lessees) +#define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct drm_mode_get_lease) +#define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct drm_mode_revoke_lease) + /** * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 34b6bb34b002..5597a87154e5 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -782,6 +782,72 @@ struct drm_mode_destroy_blob { __u32 blob_id; }; +/** + * Lease mode resources, creating another drm_master. + */ +struct drm_mode_create_lease { + /** Pointer to array of object ids (__u32) */ + __u64 object_ids; + /** Number of object ids */ + __u32 object_count; + /** flags for new FD (O_CLOEXEC, etc) */ + __u32 flags; + + /** Return: unique identifier for lessee. */ + __u32 lessee_id; + /** Return: file descriptor to new drm_master file */ + __u32 fd; +}; + +/** + * List lesses from a drm_master + */ +struct drm_mode_list_lessees { + /** Number of lessees. + * On input, provides length of the array. + * On output, provides total number. No + * more than the input number will be written + * back, so two calls can be used to get + * the size and then the data. + */ + __u32 count_lessees; + __u32 pad; + + /** Pointer to lessees. + * pointer to __u64 array of lessee ids + */ + __u64 lessees_ptr; +}; + +/** + * Get leased objects + */ +struct drm_mode_get_lease { + /** Number of leased objects. + * On input, provides length of the array. + * On output, provides total number. No + * more than the input number will be written + * back, so two calls can be used to get + * the size and then the data. + */ + __u32 count_objects; + __u32 pad; + + /** Pointer to objects. + * pointer to __u32 array of object ids + */ + __u64 objects_ptr; +}; + +/** + * Revoke lease + */ +struct drm_mode_revoke_lease { + /** Unique ID of lessee + */ + __u32 lessee_id; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From f7de15450e906ed6586b29bde609a5686cd0d034 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Fri, 20 Oct 2017 11:06:55 -0600 Subject: drm/msm: Add per-instance submit queues Currently the behavior of a command stream is provided by the user application during submission and the application is expected to internally maintain the settings for each 'context' or 'rendering queue' and specify the correct ones. This works okay for simple cases but as applications become more complex we will want to set context specific flags and do various permission checks to allow certain contexts to enable additional privileges. Add kernel-side submit queues to be analogous to 'contexts' or 'rendering queues' on the application side. Each file descriptor instance will maintain its own list of queues. Queues cannot be shared between file descriptors. For backwards compatibility context id '0' is defined as a default context specifying no priority and no special flags. This is intended to be the usual configuration for 99% of applications so that a garden variety application can function correctly without creating a queue. Only those applications requiring the specific benefit of different queues need create one. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/Makefile | 3 +- drivers/gpu/drm/msm/msm_drv.c | 56 ++++++++++++-- drivers/gpu/drm/msm/msm_drv.h | 20 +++-- drivers/gpu/drm/msm/msm_gem.h | 1 + drivers/gpu/drm/msm/msm_gem_submit.c | 14 +++- drivers/gpu/drm/msm/msm_gpu.h | 15 ++++ drivers/gpu/drm/msm/msm_submitqueue.c | 135 ++++++++++++++++++++++++++++++++++ include/uapi/drm/msm_drm.h | 22 ++++++ 8 files changed, 250 insertions(+), 16 deletions(-) create mode 100644 drivers/gpu/drm/msm/msm_submitqueue.c (limited to 'include/uapi') diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 33008fa1be9b..3c234e7b4742 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -57,7 +57,8 @@ msm-y := \ msm_iommu.o \ msm_perf.o \ msm_rd.o \ - msm_ringbuffer.o + msm_ringbuffer.o \ + msm_submitqueue.o msm-$(CONFIG_DRM_FBDEV_EMULATION) += msm_fbdev.o msm-$(CONFIG_COMMON_CLK) += mdp/mdp4/mdp4_lvds_pll.o diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index d4d4146b3425..2585bebfcf3e 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -29,9 +29,12 @@ * - 1.0.0 - initial interface * - 1.1.0 - adds madvise, and support for submits with > 4 cmd buffers * - 1.2.0 - adds explicit fence support for submit ioctl + * - 1.3.0 - adds GMEM_BASE + NR_RINGS params, SUBMITQUEUE_NEW + + * SUBMITQUEUE_CLOSE ioctls, and MSM_INFO_IOVA flag for + * MSM_GEM_INFO ioctl. */ #define MSM_VERSION_MAJOR 1 -#define MSM_VERSION_MINOR 2 +#define MSM_VERSION_MINOR 3 #define MSM_VERSION_PATCHLEVEL 0 static void msm_fb_output_poll_changed(struct drm_device *dev) @@ -504,24 +507,37 @@ static void load_gpu(struct drm_device *dev) mutex_unlock(&init_lock); } -static int msm_open(struct drm_device *dev, struct drm_file *file) +static int context_init(struct drm_file *file) { struct msm_file_private *ctx; - /* For now, load gpu on open.. to avoid the requirement of having - * firmware in the initrd. - */ - load_gpu(dev); - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; + msm_submitqueue_init(ctx); + file->driver_priv = ctx; return 0; } +static int msm_open(struct drm_device *dev, struct drm_file *file) +{ + /* For now, load gpu on open.. to avoid the requirement of having + * firmware in the initrd. + */ + load_gpu(dev); + + return context_init(file); +} + +static void context_close(struct msm_file_private *ctx) +{ + msm_submitqueue_close(ctx); + kfree(ctx); +} + static void msm_postclose(struct drm_device *dev, struct drm_file *file) { struct msm_drm_private *priv = dev->dev_private; @@ -532,7 +548,7 @@ static void msm_postclose(struct drm_device *dev, struct drm_file *file) priv->lastctx = NULL; mutex_unlock(&dev->struct_mutex); - kfree(ctx); + context_close(ctx); } static void msm_lastclose(struct drm_device *dev) @@ -777,6 +793,28 @@ unlock: return ret; } + +static int msm_ioctl_submitqueue_new(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_msm_submitqueue *args = data; + + if (args->flags & ~MSM_SUBMITQUEUE_FLAGS) + return -EINVAL; + + return msm_submitqueue_create(file->driver_priv, args->prio, + args->flags, &args->id); +} + + +static int msm_ioctl_submitqueue_close(struct drm_device *dev, void *data, + struct drm_file *file) +{ + u32 id = *(u32 *) data; + + return msm_submitqueue_remove(file->driver_priv, id); +} + static const struct drm_ioctl_desc msm_ioctls[] = { DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new, DRM_AUTH|DRM_RENDER_ALLOW), @@ -786,6 +824,8 @@ static const struct drm_ioctl_desc msm_ioctls[] = { DRM_IOCTL_DEF_DRV(MSM_GEM_SUBMIT, msm_ioctl_gem_submit, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(MSM_WAIT_FENCE, msm_ioctl_wait_fence, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(MSM_GEM_MADVISE, msm_ioctl_gem_madvise, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW, msm_ioctl_submitqueue_new, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE, msm_ioctl_submitqueue_close, DRM_AUTH|DRM_RENDER_ALLOW), }; static const struct vm_operations_struct vm_ops = { diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 5e8109c07560..b3b8f20f58db 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -56,11 +56,9 @@ struct msm_gem_address_space; struct msm_gem_vma; struct msm_file_private { - /* currently we don't do anything useful with this.. but when - * per-context address spaces are supported we'd keep track of - * the context's page-tables here. - */ - int dummy; + rwlock_t queuelock; + struct list_head submitqueues; + int queueid; }; enum msm_mdp_plane_property { @@ -319,6 +317,18 @@ void __iomem *msm_ioremap(struct platform_device *pdev, const char *name, void msm_writel(u32 data, void __iomem *addr); u32 msm_readl(const void __iomem *addr); +struct msm_gpu_submitqueue; +int msm_submitqueue_init(struct msm_file_private *ctx); +struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, + u32 id); +int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, + u32 flags, u32 *id); +int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id); +void msm_submitqueue_close(struct msm_file_private *ctx); + +void msm_submitqueue_destroy(struct kref *kref); + + #define DBG(fmt, ...) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__) #define VERB(fmt, ...) if (0) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__) diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 91c210d2359c..17f8a6c23464 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -142,6 +142,7 @@ struct msm_gem_submit { struct list_head bo_list; struct ww_acquire_ctx ticket; struct dma_fence *fence; + struct msm_gpu_submitqueue *queue; struct pid *pid; /* submitting process */ bool valid; /* true if no cmdstream patching needed */ unsigned int nr_cmds; diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 93535cac0676..c196cc615e77 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -31,7 +31,8 @@ #define BO_PINNED 0x2000 static struct msm_gem_submit *submit_create(struct drm_device *dev, - struct msm_gpu *gpu, uint32_t nr_bos, uint32_t nr_cmds) + struct msm_gpu *gpu, struct msm_gpu_submitqueue *queue, + uint32_t nr_bos, uint32_t nr_cmds) { struct msm_gem_submit *submit; uint64_t sz = sizeof(*submit) + ((u64)nr_bos * sizeof(submit->bos[0])) + @@ -49,6 +50,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, submit->fence = NULL; submit->pid = get_pid(task_pid(current)); submit->cmd = (void *)&submit->bos[nr_bos]; + submit->queue = queue; /* initially, until copy_from_user() and bo lookup succeeds: */ submit->nr_bos = 0; @@ -66,6 +68,8 @@ void msm_gem_submit_free(struct msm_gem_submit *submit) dma_fence_put(submit->fence); list_del(&submit->node); put_pid(submit->pid); + msm_submitqueue_put(submit->queue); + kfree(submit); } @@ -405,6 +409,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_gpu *gpu = priv->gpu; struct dma_fence *in_fence = NULL; struct sync_file *sync_file = NULL; + struct msm_gpu_submitqueue *queue; int out_fence_fd = -1; unsigned i; int ret; @@ -421,6 +426,10 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (MSM_PIPE_FLAGS(args->flags) & ~MSM_SUBMIT_FLAGS) return -EINVAL; + queue = msm_submitqueue_get(ctx, args->queueid); + if (!queue) + return -ENOENT; + if (args->flags & MSM_SUBMIT_FENCE_FD_IN) { in_fence = sync_file_get_fence(args->fence_fd); @@ -451,7 +460,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, } priv->struct_mutex_task = current; - submit = submit_create(dev, gpu, args->nr_bos, args->nr_cmds); + submit = submit_create(dev, gpu, queue, args->nr_bos, args->nr_cmds); if (!submit) { ret = -ENOMEM; goto out_unlock; @@ -535,6 +544,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, submit->nr_cmds = i; submit->fence = msm_fence_alloc(gpu->fctx); + if (IS_ERR(submit->fence)) { ret = PTR_ERR(submit->fence); submit->fence = NULL; diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index df4e2771fb85..a890176c27ce 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -150,6 +150,15 @@ struct msm_gpu_perfcntr { const char *name; }; +struct msm_gpu_submitqueue { + int id; + u32 flags; + u32 prio; + int faults; + struct list_head node; + struct kref ref; +}; + static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) { msm_writel(data, gpu->mmio + (reg << 2)); @@ -223,4 +232,10 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev); void __init adreno_register(void); void __exit adreno_unregister(void); +static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue) +{ + if (queue) + kref_put(&queue->ref, msm_submitqueue_destroy); +} + #endif /* __MSM_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c new file mode 100644 index 000000000000..593c3b5f44cd --- /dev/null +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -0,0 +1,135 @@ +/* Copyright (c) 2017 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include "msm_gpu.h" + +void msm_submitqueue_destroy(struct kref *kref) +{ + struct msm_gpu_submitqueue *queue = container_of(kref, + struct msm_gpu_submitqueue, ref); + + kfree(queue); +} + +struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, + u32 id) +{ + struct msm_gpu_submitqueue *entry; + + if (!ctx) + return NULL; + + read_lock(&ctx->queuelock); + + list_for_each_entry(entry, &ctx->submitqueues, node) { + if (entry->id == id) { + kref_get(&entry->ref); + read_unlock(&ctx->queuelock); + + return entry; + } + } + + read_unlock(&ctx->queuelock); + return NULL; +} + +void msm_submitqueue_close(struct msm_file_private *ctx) +{ + struct msm_gpu_submitqueue *entry, *tmp; + + if (!ctx) + return; + + /* + * No lock needed in close and there won't + * be any more user ioctls coming our way + */ + list_for_each_entry_safe(entry, tmp, &ctx->submitqueues, node) + msm_submitqueue_put(entry); +} + +int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, u32 flags, + u32 *id) +{ + struct msm_gpu_submitqueue *queue; + + if (!ctx) + return -ENODEV; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + + if (!queue) + return -ENOMEM; + + kref_init(&queue->ref); + queue->flags = flags; + queue->prio = prio; + + write_lock(&ctx->queuelock); + + queue->id = ctx->queueid++; + + if (id) + *id = queue->id; + + list_add_tail(&queue->node, &ctx->submitqueues); + + write_unlock(&ctx->queuelock); + + return 0; +} + +int msm_submitqueue_init(struct msm_file_private *ctx) +{ + if (!ctx) + return 0; + + INIT_LIST_HEAD(&ctx->submitqueues); + + rwlock_init(&ctx->queuelock); + + return msm_submitqueue_create(ctx, 2, 0, NULL); +} + +int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id) +{ + struct msm_gpu_submitqueue *entry; + + if (!ctx) + return 0; + + /* + * id 0 is the "default" queue and can't be destroyed + * by the user + */ + if (!id) + return -ENOENT; + + write_lock(&ctx->queuelock); + + list_for_each_entry(entry, &ctx->submitqueues, node) { + if (entry->id == id) { + list_del(&entry->node); + write_unlock(&ctx->queuelock); + + msm_submitqueue_put(entry); + return 0; + } + } + + write_unlock(&ctx->queuelock); + return -ENOENT; +} + diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index ad4eb2863e70..fee44322a69c 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -218,6 +218,7 @@ struct drm_msm_gem_submit { __u64 bos; /* in, ptr to array of submit_bo's */ __u64 cmds; /* in, ptr to array of submit_cmd's */ __s32 fence_fd; /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */ + __u32 queueid; /* in, submitqueue id */ }; /* The normal way to synchronize with the GPU is just to CPU_PREP on @@ -254,6 +255,20 @@ struct drm_msm_gem_madvise { __u32 retained; /* out, whether backing store still exists */ }; +/* + * Draw queues allow the user to set specific submission parameter. Command + * submissions specify a specific submitqueue to use. ID 0 is reserved for + * backwards compatibility as a "default" submitqueue + */ + +#define MSM_SUBMITQUEUE_FLAGS (0) + +struct drm_msm_submitqueue { + __u32 flags; /* in, MSM_SUBMITQUEUE_x */ + __u32 prio; /* in, Priority level */ + __u32 id; /* out, identifier */ +}; + #define DRM_MSM_GET_PARAM 0x00 /* placeholder: #define DRM_MSM_SET_PARAM 0x01 @@ -265,6 +280,11 @@ struct drm_msm_gem_madvise { #define DRM_MSM_GEM_SUBMIT 0x06 #define DRM_MSM_WAIT_FENCE 0x07 #define DRM_MSM_GEM_MADVISE 0x08 +/* placeholder: +#define DRM_MSM_GEM_SVM_NEW 0x09 + */ +#define DRM_MSM_SUBMITQUEUE_NEW 0x0A +#define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B #define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param) #define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new) @@ -274,6 +294,8 @@ struct drm_msm_gem_madvise { #define DRM_IOCTL_MSM_GEM_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_SUBMIT, struct drm_msm_gem_submit) #define DRM_IOCTL_MSM_WAIT_FENCE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_WAIT_FENCE, struct drm_msm_wait_fence) #define DRM_IOCTL_MSM_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_MADVISE, struct drm_msm_gem_madvise) +#define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue) +#define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32) #if defined(__cplusplus) } -- cgit v1.2.3 From f97decac5f4c2d862e5b848694e3ffb29fc8acdd Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Fri, 20 Oct 2017 11:06:57 -0600 Subject: drm/msm: Support multiple ringbuffers Add the infrastructure to support the idea of multiple ringbuffers. Assign each ringbuffer an id and use that as an index for the various ring specific operations. The biggest delta is to support legacy fences. Each fence gets its own sequence number but the legacy functions expect to use a unique integer. To handle this we return a unique identifier for each submission but map it to a specific ring/sequence under the covers. Newer users use a dma_fence pointer anyway so they don't care about the actual sequence ID or ring. The actual mechanics for multiple ringbuffers are very target specific so this code just allows for the possibility but still only defines one ringbuffer for each target family. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 9 +- drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 9 +- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 54 +- drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 2 +- drivers/gpu/drm/msm/adreno/a5xx_power.c | 6 +- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 136 +++-- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 20 +- drivers/gpu/drm/msm/msm_drv.c | 23 +- drivers/gpu/drm/msm/msm_drv.h | 8 +- drivers/gpu/drm/msm/msm_fence.c | 2 +- drivers/gpu/drm/msm/msm_fence.h | 2 +- drivers/gpu/drm/msm/msm_gem.h | 4 +- drivers/gpu/drm/msm/msm_gem_submit.c | 12 +- drivers/gpu/drm/msm/msm_gpu.c | 163 ++++-- drivers/gpu/drm/msm/msm_gpu.h | 42 +- drivers/gpu/drm/msm/msm_ringbuffer.c | 34 +- drivers/gpu/drm/msm/msm_ringbuffer.h | 20 +- drivers/gpu/drm/msm/msm_submitqueue.c | 27 +- include/dt-bindings/msm/msm-bus-ids.h | 887 ++++++++++++++++++++++++++++++++ include/uapi/drm/msm_drm.h | 1 + 20 files changed, 1251 insertions(+), 210 deletions(-) create mode 100644 include/dt-bindings/msm/msm-bus-ids.h (limited to 'include/uapi') diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 789f7fb86cba..4baef2738178 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -44,7 +44,7 @@ static bool a3xx_idle(struct msm_gpu *gpu); static bool a3xx_me_init(struct msm_gpu *gpu) { - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT3(ring, CP_ME_INIT, 17); OUT_RING(ring, 0x000003f7); @@ -65,7 +65,7 @@ static bool a3xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); return a3xx_idle(gpu); } @@ -339,7 +339,7 @@ static void a3xx_destroy(struct msm_gpu *gpu) static bool a3xx_idle(struct msm_gpu *gpu) { /* wait for ringbuffer to drain: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, gpu->rb[0])) return false; /* then wait for GPU to finish: */ @@ -446,6 +446,7 @@ static const struct adreno_gpu_funcs funcs = { .recover = a3xx_recover, .submit = adreno_submit, .flush = adreno_flush, + .active_ring = adreno_active_ring, .irq = a3xx_irq, .destroy = a3xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -491,7 +492,7 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = a3xx_registers; adreno_gpu->reg_offsets = a3xx_register_offsets; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index f87c43124099..8199a4b9f2fa 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -116,7 +116,7 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu) static bool a4xx_me_init(struct msm_gpu *gpu) { - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT3(ring, CP_ME_INIT, 17); OUT_RING(ring, 0x000003f7); @@ -137,7 +137,7 @@ static bool a4xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); return a4xx_idle(gpu); } @@ -337,7 +337,7 @@ static void a4xx_destroy(struct msm_gpu *gpu) static bool a4xx_idle(struct msm_gpu *gpu) { /* wait for ringbuffer to drain: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, gpu->rb[0])) return false; /* then wait for GPU to finish: */ @@ -534,6 +534,7 @@ static const struct adreno_gpu_funcs funcs = { .recover = a4xx_recover, .submit = adreno_submit, .flush = adreno_flush, + .active_ring = adreno_active_ring, .irq = a4xx_irq, .destroy = a4xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -573,7 +574,7 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = a4xx_registers; adreno_gpu->reg_offsets = a4xx_register_offsets; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index e34835c3b55d..32252f8ac30c 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -117,7 +117,7 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx) { struct msm_drm_private *priv = gpu->dev->dev_private; - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; for (i = 0; i < submit->nr_cmds; i++) { @@ -138,15 +138,15 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, submit->seqno); OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31)); - OUT_RING(ring, lower_32_bits(rbmemptr(gpu, fence))); - OUT_RING(ring, upper_32_bits(rbmemptr(gpu, fence))); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, submit->seqno); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); } static const struct { @@ -262,7 +262,7 @@ void a5xx_set_hwcg(struct msm_gpu *gpu, bool state) static int a5xx_me_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT7(ring, CP_ME_INIT, 8); @@ -293,9 +293,8 @@ static int a5xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); - - return a5xx_idle(gpu) ? 0 : -EINVAL; + gpu->funcs->flush(gpu, ring); + return a5xx_idle(gpu, ring) ? 0 : -EINVAL; } static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, @@ -581,11 +580,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu) * ticking correctly */ if (adreno_is_a530(adreno_gpu)) { - OUT_PKT7(gpu->rb, CP_EVENT_WRITE, 1); - OUT_RING(gpu->rb, 0x0F); + OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1); + OUT_RING(gpu->rb[0], 0x0F); - gpu->funcs->flush(gpu); - if (!a5xx_idle(gpu)) + gpu->funcs->flush(gpu, gpu->rb[0]); + if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; } @@ -598,11 +597,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu) */ ret = a5xx_zap_shader_init(gpu); if (!ret) { - OUT_PKT7(gpu->rb, CP_SET_SECURE_MODE, 1); - OUT_RING(gpu->rb, 0x00000000); + OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); + OUT_RING(gpu->rb[0], 0x00000000); - gpu->funcs->flush(gpu); - if (!a5xx_idle(gpu)) + gpu->funcs->flush(gpu, gpu->rb[0]); + if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; } else { /* Print a warning so if we die, we know why */ @@ -676,18 +675,19 @@ static inline bool _a5xx_check_idle(struct msm_gpu *gpu) A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT); } -bool a5xx_idle(struct msm_gpu *gpu) +bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { /* wait for CP to drain ringbuffer: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, ring)) return false; if (spin_until(_a5xx_check_idle(gpu))) { - DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X\n", + DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", gpu->name, __builtin_return_address(0), gpu_read(gpu, REG_A5XX_RBBM_STATUS), - gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS)); - + gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS), + gpu_read(gpu, REG_A5XX_CP_RB_RPTR), + gpu_read(gpu, REG_A5XX_CP_RB_WPTR)); return false; } @@ -818,9 +818,10 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); - dev_err(dev->dev, "gpu fault fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", - gpu->memptrs->fence, + dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", + ring ? ring->id : -1, ring ? ring->seqno : 0, gpu_read(gpu, REG_A5XX_RBBM_STATUS), gpu_read(gpu, REG_A5XX_CP_RB_RPTR), gpu_read(gpu, REG_A5XX_CP_RB_WPTR), @@ -1010,6 +1011,7 @@ static const struct adreno_gpu_funcs funcs = { .recover = a5xx_recover, .submit = a5xx_submit, .flush = adreno_flush, + .active_ring = adreno_active_ring, .irq = a5xx_irq, .destroy = a5xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -1045,7 +1047,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) a5xx_gpu->lm_leakage = 0x4E001A; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) { a5xx_destroy(&(a5xx_gpu->base.base)); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index e94451685bf8..44db48d86202 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -55,7 +55,7 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs, return -ETIMEDOUT; } -bool a5xx_idle(struct msm_gpu *gpu); +bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); void a5xx_set_hwcg(struct msm_gpu *gpu, bool state); #endif /* __A5XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index b5de2be67732..e5700bbf09dd 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -173,7 +173,7 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; if (!a5xx_gpu->gpmu_dwords) return 0; @@ -192,9 +192,9 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); OUT_RING(ring, 1); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); - if (!a5xx_idle(gpu)) { + if (!a5xx_idle(gpu, ring)) { DRM_ERROR("%s: Unable to load GPMU firmware. GPMU will not be active\n", gpu->name); return -EINVAL; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 5f2501c2cd3f..fd0fb0568dd3 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -21,7 +21,6 @@ #include "msm_gem.h" #include "msm_mmu.h" -#define RB_SIZE SZ_32K #define RB_BLKSIZE 32 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) @@ -163,7 +162,7 @@ static int adreno_load_fw(struct adreno_gpu *adreno_gpu) int adreno_hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - int ret; + int ret, i; DBG("%s", gpu->name); @@ -171,34 +170,42 @@ int adreno_hw_init(struct msm_gpu *gpu) if (ret) return ret; - ret = msm_gem_get_iova(gpu->rb->bo, gpu->aspace, &gpu->rb_iova); - if (ret) { - gpu->rb_iova = 0; - dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret); - return ret; - } + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; - /* reset ringbuffer: */ - gpu->rb->cur = gpu->rb->start; + if (!ring) + continue; - /* reset completed fence seqno: */ - gpu->memptrs->fence = gpu->fctx->completed_fence; - gpu->memptrs->rptr = 0; + ret = msm_gem_get_iova(ring->bo, gpu->aspace, &ring->iova); + if (ret) { + ring->iova = 0; + dev_err(gpu->dev->dev, + "could not map ringbuffer %d: %d\n", i, ret); + return ret; + } + + ring->cur = ring->start; + + /* reset completed fence seqno: */ + ring->memptrs->fence = ring->seqno; + ring->memptrs->rptr = 0; + } /* Setup REG_CP_RB_CNTL: */ adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, - /* size is log2(quad-words): */ - AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) | - AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) | - (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); + /* size is log2(quad-words): */ + AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | + AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) | + (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); - /* Setup ringbuffer address: */ + /* Setup ringbuffer address - use ringbuffer[0] for GPU init */ adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE, - REG_ADRENO_CP_RB_BASE_HI, gpu->rb_iova); + REG_ADRENO_CP_RB_BASE_HI, gpu->rb[0]->iova); if (!adreno_is_a430(adreno_gpu)) { adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, - REG_ADRENO_CP_RB_RPTR_ADDR_HI, rbmemptr(gpu, rptr)); + REG_ADRENO_CP_RB_RPTR_ADDR_HI, + rbmemptr(gpu->rb[0], rptr)); } return 0; @@ -210,15 +217,19 @@ static uint32_t get_wptr(struct msm_ringbuffer *ring) } /* Use this helper to read rptr, since a430 doesn't update rptr in memory */ -static uint32_t get_rptr(struct adreno_gpu *adreno_gpu) +static uint32_t get_rptr(struct adreno_gpu *adreno_gpu, + struct msm_ringbuffer *ring) { - struct msm_gpu *gpu = &adreno_gpu->base; - if (adreno_is_a430(adreno_gpu)) - return gpu->memptrs->rptr = adreno_gpu_read( + return ring->memptrs->rptr = adreno_gpu_read( adreno_gpu, REG_ADRENO_CP_RB_RPTR); else - return gpu->memptrs->rptr; + return ring->memptrs->rptr; +} + +struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu) +{ + return gpu->rb[0]; } void adreno_recover(struct msm_gpu *gpu) @@ -244,7 +255,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct msm_drm_private *priv = gpu->dev->dev_private; - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = submit->ring; unsigned i; for (i = 0; i < submit->nr_cmds; i++) { @@ -267,7 +278,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, submit->seqno); if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) { /* Flush HLSQ lazy updates to make sure there is nothing @@ -283,8 +294,8 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, OUT_PKT3(ring, CP_EVENT_WRITE, 3); OUT_RING(ring, CACHE_FLUSH_TS); - OUT_RING(ring, rbmemptr(gpu, fence)); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, rbmemptr(ring, fence)); + OUT_RING(ring, submit->seqno); /* we could maybe be clever and only CP_COND_EXEC the interrupt: */ OUT_PKT3(ring, CP_INTERRUPT, 1); @@ -310,10 +321,10 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } #endif - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); } -void adreno_flush(struct msm_gpu *gpu) +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); uint32_t wptr; @@ -323,7 +334,7 @@ void adreno_flush(struct msm_gpu *gpu) * to account for the possibility that the last command fit exactly into * the ringbuffer and rb->next hasn't wrapped to zero yet */ - wptr = get_wptr(gpu->rb) & ((gpu->rb->size / 4) - 1); + wptr = get_wptr(ring) % (MSM_GPU_RINGBUFFER_SZ >> 2); /* ensure writes to ringbuffer have hit system memory: */ mb(); @@ -331,17 +342,18 @@ void adreno_flush(struct msm_gpu *gpu) adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr); } -bool adreno_idle(struct msm_gpu *gpu) +bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t wptr = get_wptr(gpu->rb); + uint32_t wptr = get_wptr(ring); /* wait for CP to drain ringbuffer: */ - if (!spin_until(get_rptr(adreno_gpu) == wptr)) + if (!spin_until(get_rptr(adreno_gpu, ring) == wptr)) return true; /* TODO maybe we need to reset GPU here to recover from hang? */ - DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name); + DRM_ERROR("%s: timeout waiting to drain ringbuffer %d!\n", gpu->name, + ring->id); return false; } @@ -356,10 +368,16 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.patchid); - seq_printf(m, "fence: %d/%d\n", gpu->memptrs->fence, - gpu->fctx->last_fence); - seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu)); - seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + seq_printf(m, "rb %d: fence: %d/%d\n", i, + ring->memptrs->fence, ring->seqno); + + seq_printf(m, " rptr: %d\n", + get_rptr(adreno_gpu, ring)); + seq_printf(m, "rb wptr: %d\n", get_wptr(ring)); + } /* dump these out in a form that can be parsed by demsm: */ seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name); @@ -385,16 +403,23 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) void adreno_dump_info(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + int i; printk("revision: %d (%d.%d.%d.%d)\n", adreno_gpu->info->revn, adreno_gpu->rev.core, adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.patchid); - printk("fence: %d/%d\n", gpu->memptrs->fence, - gpu->fctx->last_fence); - printk("rptr: %d\n", get_rptr(adreno_gpu)); - printk("rb wptr: %d\n", get_wptr(gpu->rb)); + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + printk("rb %d: fence: %d/%d\n", i, + ring->memptrs->fence, + ring->seqno); + + printk("rptr: %d\n", get_rptr(adreno_gpu, ring)); + printk("rb wptr: %d\n", get_wptr(ring)); + } } /* would be nice to not have to duplicate the _show() stuff with printk(): */ @@ -417,23 +442,26 @@ void adreno_dump(struct msm_gpu *gpu) } } -static uint32_t ring_freewords(struct msm_gpu *gpu) +static uint32_t ring_freewords(struct msm_ringbuffer *ring) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t size = gpu->rb->size / 4; - uint32_t wptr = get_wptr(gpu->rb); - uint32_t rptr = get_rptr(adreno_gpu); + struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu); + uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2; + uint32_t wptr = get_wptr(ring); + uint32_t rptr = get_rptr(adreno_gpu, ring); return (rptr + (size - 1) - wptr) % size; } -void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords) +void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords) { - if (spin_until(ring_freewords(gpu) >= ndwords)) - DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name); + if (spin_until(ring_freewords(ring) >= ndwords)) + DRM_DEV_ERROR(ring->gpu->dev->dev, + "timeout waiting for space in ringubffer %d\n", + ring->id); } int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, - struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs) + struct adreno_gpu *adreno_gpu, + const struct adreno_gpu_funcs *funcs, int nr_rings) { struct adreno_platform_config *config = pdev->dev.platform_data; struct msm_gpu_config adreno_gpu_config = { 0 }; @@ -460,7 +488,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, adreno_gpu_config.va_start = SZ_16M; adreno_gpu_config.va_end = 0xffffffff; - adreno_gpu_config.ringsz = RB_SIZE; + adreno_gpu_config.nr_rings = nr_rings; pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD); pm_runtime_use_autosuspend(&pdev->dev); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 1676282948d5..3e9a1743f476 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -208,17 +208,19 @@ int adreno_hw_init(struct msm_gpu *gpu); void adreno_recover(struct msm_gpu *gpu); void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); -void adreno_flush(struct msm_gpu *gpu); -bool adreno_idle(struct msm_gpu *gpu); +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring); +bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); #ifdef CONFIG_DEBUG_FS void adreno_show(struct msm_gpu *gpu, struct seq_file *m); #endif void adreno_dump_info(struct msm_gpu *gpu); void adreno_dump(struct msm_gpu *gpu); -void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords); +void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords); +struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu); int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, - struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs); + struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, + int nr_rings); void adreno_gpu_cleanup(struct adreno_gpu *gpu); @@ -227,7 +229,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu); static inline void OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt+1); + adreno_wait_ring(ring, cnt+1); OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); } @@ -235,14 +237,14 @@ OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) static inline void OUT_PKT2(struct msm_ringbuffer *ring) { - adreno_wait_ring(ring->gpu, 1); + adreno_wait_ring(ring, 1); OUT_RING(ring, CP_TYPE2_PKT); } static inline void OUT_PKT3(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt+1); + adreno_wait_ring(ring, cnt+1); OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); } @@ -264,14 +266,14 @@ static inline u32 PM4_PARITY(u32 val) static inline void OUT_PKT4(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt + 1); + adreno_wait_ring(ring, cnt + 1); OUT_RING(ring, PKT4(regindx, cnt)); } static inline void OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt + 1); + adreno_wait_ring(ring, cnt + 1); OUT_RING(ring, CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) | ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23)); } diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 2585bebfcf3e..bfb8c7cf800a 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -507,7 +507,7 @@ static void load_gpu(struct drm_device *dev) mutex_unlock(&init_lock); } -static int context_init(struct drm_file *file) +static int context_init(struct drm_device *dev, struct drm_file *file) { struct msm_file_private *ctx; @@ -515,7 +515,7 @@ static int context_init(struct drm_file *file) if (!ctx) return -ENOMEM; - msm_submitqueue_init(ctx); + msm_submitqueue_init(dev, ctx); file->driver_priv = ctx; @@ -529,7 +529,7 @@ static int msm_open(struct drm_device *dev, struct drm_file *file) */ load_gpu(dev); - return context_init(file); + return context_init(dev, file); } static void context_close(struct msm_file_private *ctx) @@ -743,16 +743,27 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, struct msm_drm_private *priv = dev->dev_private; struct drm_msm_wait_fence *args = data; ktime_t timeout = to_ktime(args->timeout); + struct msm_gpu_submitqueue *queue; + struct msm_gpu *gpu = priv->gpu; + int ret; if (args->pad) { DRM_ERROR("invalid pad: %08x\n", args->pad); return -EINVAL; } - if (!priv->gpu) + if (!gpu) return 0; - return msm_wait_fence(priv->gpu->fctx, args->fence, &timeout, true); + queue = msm_submitqueue_get(file->driver_priv, args->queueid); + if (!queue) + return -ENOENT; + + ret = msm_wait_fence(gpu->rb[queue->prio]->fctx, args->fence, &timeout, + true); + + msm_submitqueue_put(queue); + return ret; } static int msm_ioctl_gem_madvise(struct drm_device *dev, void *data, @@ -802,7 +813,7 @@ static int msm_ioctl_submitqueue_new(struct drm_device *dev, void *data, if (args->flags & ~MSM_SUBMITQUEUE_FLAGS) return -EINVAL; - return msm_submitqueue_create(file->driver_priv, args->prio, + return msm_submitqueue_create(dev, file->driver_priv, args->prio, args->flags, &args->id); } diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index b3b8f20f58db..2821f572ecd8 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -74,6 +74,8 @@ struct msm_vblank_ctrl { spinlock_t lock; }; +#define MSM_GPU_MAX_RINGS 1 + struct msm_drm_private { struct drm_device *dev; @@ -318,11 +320,11 @@ void msm_writel(u32 data, void __iomem *addr); u32 msm_readl(const void __iomem *addr); struct msm_gpu_submitqueue; -int msm_submitqueue_init(struct msm_file_private *ctx); +int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx); struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, u32 id); -int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, - u32 flags, u32 *id); +int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, + u32 prio, u32 flags, u32 *id); int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id); void msm_submitqueue_close(struct msm_file_private *ctx); diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c index a2f89bac9c16..349c12f670eb 100644 --- a/drivers/gpu/drm/msm/msm_fence.c +++ b/drivers/gpu/drm/msm/msm_fence.c @@ -31,7 +31,7 @@ msm_fence_context_alloc(struct drm_device *dev, const char *name) return ERR_PTR(-ENOMEM); fctx->dev = dev; - fctx->name = name; + strncpy(fctx->name, name, sizeof(fctx->name)); fctx->context = dma_fence_context_alloc(1); init_waitqueue_head(&fctx->event); spin_lock_init(&fctx->spinlock); diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h index 56061aa1959d..1aa6a4c6530c 100644 --- a/drivers/gpu/drm/msm/msm_fence.h +++ b/drivers/gpu/drm/msm/msm_fence.h @@ -22,7 +22,7 @@ struct msm_fence_context { struct drm_device *dev; - const char *name; + char name[32]; unsigned context; /* last_fence == completed_fence --> no pending work */ uint32_t last_fence; /* last assigned fence */ diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 17f8a6c23464..9320e184b48d 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -138,13 +138,15 @@ void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass); struct msm_gem_submit { struct drm_device *dev; struct msm_gpu *gpu; - struct list_head node; /* node in gpu submit_list */ + struct list_head node; /* node in ring submit list */ struct list_head bo_list; struct ww_acquire_ctx ticket; + uint32_t seqno; /* Sequence number of the submit on the ring */ struct dma_fence *fence; struct msm_gpu_submitqueue *queue; struct pid *pid; /* submitting process */ bool valid; /* true if no cmdstream patching needed */ + struct msm_ringbuffer *ring; unsigned int nr_cmds; unsigned int nr_bos; struct { diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index c196cc615e77..d3551aa130fb 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -51,6 +51,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, submit->pid = get_pid(task_pid(current)); submit->cmd = (void *)&submit->bos[nr_bos]; submit->queue = queue; + submit->ring = gpu->rb[queue->prio]; /* initially, until copy_from_user() and bo lookup succeeds: */ submit->nr_bos = 0; @@ -247,7 +248,8 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) if (no_implicit) continue; - ret = msm_gem_sync_object(&msm_obj->base, submit->gpu->fctx, write); + ret = msm_gem_sync_object(&msm_obj->base, submit->ring->fctx, + write); if (ret) break; } @@ -410,6 +412,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct dma_fence *in_fence = NULL; struct sync_file *sync_file = NULL; struct msm_gpu_submitqueue *queue; + struct msm_ringbuffer *ring; int out_fence_fd = -1; unsigned i; int ret; @@ -430,6 +433,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (!queue) return -ENOENT; + ring = gpu->rb[queue->prio]; + if (args->flags & MSM_SUBMIT_FENCE_FD_IN) { in_fence = sync_file_get_fence(args->fence_fd); @@ -440,7 +445,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, * Wait if the fence is from a foreign context, or if the fence * array contains any fence from a foreign context. */ - if (!dma_fence_match_context(in_fence, gpu->fctx->context)) { + if (!dma_fence_match_context(in_fence, ring->fctx->context)) { ret = dma_fence_wait(in_fence, true); if (ret) return ret; @@ -543,8 +548,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, submit->nr_cmds = i; - submit->fence = msm_fence_alloc(gpu->fctx); - + submit->fence = msm_fence_alloc(ring->fctx); if (IS_ERR(submit->fence)) { ret = PTR_ERR(submit->fence); submit->fence = NULL; diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 0744837ed70f..ec28c99ee36e 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -221,6 +221,20 @@ int msm_gpu_hw_init(struct msm_gpu *gpu) * Hangcheck detection for locked gpu: */ +static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + uint32_t fence) +{ + struct msm_gem_submit *submit; + + list_for_each_entry(submit, &ring->submits, node) { + if (submit->seqno > fence) + break; + + msm_update_fence(submit->ring->fctx, + submit->fence->seqno); + } +} + static void retire_submits(struct msm_gpu *gpu); static void recover_worker(struct work_struct *work) @@ -228,15 +242,34 @@ static void recover_worker(struct work_struct *work) struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); struct drm_device *dev = gpu->dev; struct msm_gem_submit *submit; - uint32_t fence = gpu->memptrs->fence; + struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); + uint64_t fence; + int i; + + /* Update all the rings with the latest and greatest fence */ + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; - msm_update_fence(gpu->fctx, fence + 1); + fence = ring->memptrs->fence; + + /* + * For the current (faulting?) ring/submit advance the fence by + * one more to clear the faulting submit + */ + if (ring == cur_ring) + fence = fence + 1; + + update_fences(gpu, ring, fence); + } mutex_lock(&dev->struct_mutex); + dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); - list_for_each_entry(submit, &gpu->submit_list, node) { - if (submit->fence->seqno == (fence + 1)) { + fence = cur_ring->memptrs->fence + 1; + + list_for_each_entry(submit, &cur_ring->submits, node) { + if (submit->seqno == fence) { struct task_struct *task; rcu_read_lock(); @@ -258,9 +291,16 @@ static void recover_worker(struct work_struct *work) gpu->funcs->recover(gpu); pm_runtime_put_sync(&gpu->pdev->dev); - /* replay the remaining submits after the one that hung: */ - list_for_each_entry(submit, &gpu->submit_list, node) { - gpu->funcs->submit(gpu, submit, NULL); + /* + * Replay all remaining submits starting with highest priority + * ring + */ + + for (i = gpu->nr_rings - 1; i >= 0; i--) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + list_for_each_entry(submit, &ring->submits, node) + gpu->funcs->submit(gpu, submit, NULL); } } @@ -281,25 +321,27 @@ static void hangcheck_handler(unsigned long data) struct msm_gpu *gpu = (struct msm_gpu *)data; struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; - uint32_t fence = gpu->memptrs->fence; + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); + uint32_t fence = ring->memptrs->fence; - if (fence != gpu->hangcheck_fence) { + if (fence != ring->hangcheck_fence) { /* some progress has been made.. ya! */ - gpu->hangcheck_fence = fence; - } else if (fence < gpu->fctx->last_fence) { + ring->hangcheck_fence = fence; + } else if (fence < ring->seqno) { /* no progress and not done.. hung! */ - gpu->hangcheck_fence = fence; - dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n", - gpu->name); + ring->hangcheck_fence = fence; + dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", + gpu->name, ring->id); dev_err(dev->dev, "%s: completed fence: %u\n", gpu->name, fence); dev_err(dev->dev, "%s: submitted fence: %u\n", - gpu->name, gpu->fctx->last_fence); + gpu->name, ring->seqno); + queue_work(priv->wq, &gpu->recover_work); } /* if still more pending work, reset the hangcheck timer: */ - if (gpu->fctx->last_fence > gpu->hangcheck_fence) + if (ring->seqno > ring->hangcheck_fence) hangcheck_timer_reset(gpu); /* workaround for missing irq: */ @@ -428,19 +470,18 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) static void retire_submits(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; + struct msm_gem_submit *submit, *tmp; + int i; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - while (!list_empty(&gpu->submit_list)) { - struct msm_gem_submit *submit; - - submit = list_first_entry(&gpu->submit_list, - struct msm_gem_submit, node); + /* Retire the commits starting with highest priority */ + for (i = gpu->nr_rings - 1; i >= 0; i--) { + struct msm_ringbuffer *ring = gpu->rb[i]; - if (dma_fence_is_signaled(submit->fence)) { - retire_submit(gpu, submit); - } else { - break; + list_for_each_entry_safe(submit, tmp, &ring->submits, node) { + if (dma_fence_is_signaled(submit->fence)) + retire_submit(gpu, submit); } } } @@ -449,9 +490,10 @@ static void retire_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); struct drm_device *dev = gpu->dev; - uint32_t fence = gpu->memptrs->fence; + int i; - msm_update_fence(gpu->fctx, fence); + for (i = 0; i < gpu->nr_rings; i++) + update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); mutex_lock(&dev->struct_mutex); retire_submits(gpu); @@ -472,6 +514,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring = submit->ring; int i; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -480,7 +523,9 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, msm_gpu_hw_init(gpu); - list_add_tail(&submit->node, &gpu->submit_list); + submit->seqno = ++ring->seqno; + + list_add_tail(&submit->node, &ring->submits); msm_rd_dump_submit(submit); @@ -605,7 +650,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, const char *name, struct msm_gpu_config *config) { - int ret; + int i, ret, nr_rings = config->nr_rings; + void *memptrs; + uint64_t memptrs_iova; if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); @@ -613,18 +660,11 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, gpu->dev = drm; gpu->funcs = funcs; gpu->name = name; - gpu->fctx = msm_fence_context_alloc(drm, name); - if (IS_ERR(gpu->fctx)) { - ret = PTR_ERR(gpu->fctx); - gpu->fctx = NULL; - goto fail; - } INIT_LIST_HEAD(&gpu->active_list); INIT_WORK(&gpu->retire_work, retire_worker); INIT_WORK(&gpu->recover_work, recover_worker); - INIT_LIST_HEAD(&gpu->submit_list); setup_timer(&gpu->hangcheck_timer, hangcheck_handler, (unsigned long)gpu); @@ -689,29 +729,47 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, goto fail; } - gpu->memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), + memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo, - &gpu->memptrs_iova); + &memptrs_iova); - if (IS_ERR(gpu->memptrs)) { - ret = PTR_ERR(gpu->memptrs); - gpu->memptrs = NULL; + if (IS_ERR(memptrs)) { + ret = PTR_ERR(memptrs); dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); goto fail; } - /* Create ringbuffer: */ - gpu->rb = msm_ringbuffer_new(gpu, config->ringsz); - if (IS_ERR(gpu->rb)) { - ret = PTR_ERR(gpu->rb); - gpu->rb = NULL; - dev_err(drm->dev, "could not create ringbuffer: %d\n", ret); - goto fail; + if (nr_rings > ARRAY_SIZE(gpu->rb)) { + DRM_DEV_INFO_ONCE(drm->dev, "Only creating %lu ringbuffers\n", + ARRAY_SIZE(gpu->rb)); + nr_rings = ARRAY_SIZE(gpu->rb); } + /* Create ringbuffer(s): */ + for (i = 0; i < nr_rings; i++) { + gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova); + + if (IS_ERR(gpu->rb[i])) { + ret = PTR_ERR(gpu->rb[i]); + dev_err(drm->dev, + "could not create ringbuffer %d: %d\n", i, ret); + goto fail; + } + + memptrs += sizeof(struct msm_rbmemptrs); + memptrs_iova += sizeof(struct msm_rbmemptrs); + } + + gpu->nr_rings = nr_rings; + return 0; fail: + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + msm_ringbuffer_destroy(gpu->rb[i]); + gpu->rb[i] = NULL; + } + if (gpu->memptrs_bo) { msm_gem_put_vaddr(gpu->memptrs_bo); msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); @@ -724,16 +782,17 @@ fail: void msm_gpu_cleanup(struct msm_gpu *gpu) { + int i; + DBG("%s", gpu->name); WARN_ON(!list_empty(&gpu->active_list)); bs_fini(gpu); - if (gpu->rb) { - if (gpu->rb_iova) - msm_gem_put_iova(gpu->rb->bo, gpu->aspace); - msm_ringbuffer_destroy(gpu->rb); + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + msm_ringbuffer_destroy(gpu->rb[i]); + gpu->rb[i] = NULL; } if (gpu->memptrs_bo) { diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 8ddda059de34..1be0317bb2c0 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -33,7 +33,7 @@ struct msm_gpu_config { const char *irqname; uint64_t va_start; uint64_t va_end; - unsigned int ringsz; + unsigned int nr_rings; }; /* So far, with hardware that I've seen to date, we can have: @@ -57,8 +57,9 @@ struct msm_gpu_funcs { int (*pm_resume)(struct msm_gpu *gpu); void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); - void (*flush)(struct msm_gpu *gpu); + void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); irqreturn_t (*irq)(struct msm_gpu *irq); + struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu); void (*recover)(struct msm_gpu *gpu); void (*destroy)(struct msm_gpu *gpu); #ifdef CONFIG_DEBUG_FS @@ -67,14 +68,6 @@ struct msm_gpu_funcs { #endif }; -#define rbmemptr(gpu, member) \ - ((gpu)->memptrs_iova + offsetof(struct msm_rbmemptrs, member)) - -struct msm_rbmemptrs { - volatile uint32_t rptr; - volatile uint32_t fence; -}; - struct msm_gpu { const char *name; struct drm_device *dev; @@ -93,16 +86,12 @@ struct msm_gpu { const struct msm_gpu_perfcntr *perfcntrs; uint32_t num_perfcntrs; - /* ringbuffer: */ - struct msm_ringbuffer *rb; - uint64_t rb_iova; + struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS]; + int nr_rings; /* list of GEM active objects: */ struct list_head active_list; - /* fencing: */ - struct msm_fence_context *fctx; - /* does gpu need hw_init? */ bool needs_hw_init; @@ -133,21 +122,26 @@ struct msm_gpu { #define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */ #define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD) struct timer_list hangcheck_timer; - uint32_t hangcheck_fence; struct work_struct recover_work; - struct list_head submit_list; - - struct msm_rbmemptrs *memptrs; struct drm_gem_object *memptrs_bo; - uint64_t memptrs_iova; - - }; +/* It turns out that all targets use the same ringbuffer size */ +#define MSM_GPU_RINGBUFFER_SZ SZ_32K + static inline bool msm_gpu_active(struct msm_gpu *gpu) { - return gpu->fctx->last_fence > gpu->memptrs->fence; + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + if (ring->seqno > ring->memptrs->fence) + return true; + } + + return false; } /* Perf-Counters: diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index bf065a540130..4db6ca719706 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -18,13 +18,15 @@ #include "msm_ringbuffer.h" #include "msm_gpu.h" -struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) +struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, + void *memptrs, uint64_t memptrs_iova) { struct msm_ringbuffer *ring; + char name[32]; int ret; - if (WARN_ON(!is_power_of_2(size))) - return ERR_PTR(-EINVAL); + /* We assume everwhere that MSM_GPU_RINGBUFFER_SZ is a power of 2 */ + BUILD_BUG_ON(!is_power_of_2(MSM_GPU_RINGBUFFER_SZ)); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) { @@ -33,32 +35,44 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) } ring->gpu = gpu; - + ring->id = id; /* Pass NULL for the iova pointer - we will map it later */ - ring->start = msm_gem_kernel_new(gpu->dev, size, MSM_BO_WC, - gpu->aspace, &ring->bo, NULL); + ring->start = msm_gem_kernel_new(gpu->dev, MSM_GPU_RINGBUFFER_SZ, + MSM_BO_WC, gpu->aspace, &ring->bo, NULL); if (IS_ERR(ring->start)) { ret = PTR_ERR(ring->start); ring->start = 0; goto fail; } - ring->end = ring->start + (size / 4); + ring->end = ring->start + (MSM_GPU_RINGBUFFER_SZ >> 2); ring->cur = ring->start; - ring->size = size; + ring->memptrs = memptrs; + ring->memptrs_iova = memptrs_iova; + + INIT_LIST_HEAD(&ring->submits); + + snprintf(name, sizeof(name), "gpu-ring-%d", ring->id); + + ring->fctx = msm_fence_context_alloc(gpu->dev, name); return ring; fail: - if (ring) - msm_ringbuffer_destroy(ring); + msm_ringbuffer_destroy(ring); return ERR_PTR(ret); } void msm_ringbuffer_destroy(struct msm_ringbuffer *ring) { + if (IS_ERR_OR_NULL(ring)) + return; + + msm_fence_context_free(ring->fctx); + if (ring->bo) { + msm_gem_put_iova(ring->bo, ring->gpu->aspace); msm_gem_put_vaddr(ring->bo); drm_gem_object_unreference_unlocked(ring->bo); } diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 6e0e1049fa4f..ec44251ef9f2 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -20,14 +20,30 @@ #include "msm_drv.h" +#define rbmemptr(ring, member) \ + ((ring)->memptrs_iova + offsetof(struct msm_rbmemptrs, member)) + +struct msm_rbmemptrs { + volatile uint32_t rptr; + volatile uint32_t fence; +}; + struct msm_ringbuffer { struct msm_gpu *gpu; - int size; + int id; struct drm_gem_object *bo; uint32_t *start, *end, *cur; + struct list_head submits; + uint64_t iova; + uint32_t seqno; + uint32_t hangcheck_fence; + struct msm_rbmemptrs *memptrs; + uint64_t memptrs_iova; + struct msm_fence_context *fctx; }; -struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size); +struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, + void *memptrs, uint64_t memptrs_iova); void msm_ringbuffer_destroy(struct msm_ringbuffer *ring); /* ringbuffer helpers (the parts that are same for a3xx/a2xx/z180..) */ diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c index 593c3b5f44cd..5115f75b5b7f 100644 --- a/drivers/gpu/drm/msm/msm_submitqueue.c +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -60,9 +60,10 @@ void msm_submitqueue_close(struct msm_file_private *ctx) msm_submitqueue_put(entry); } -int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, u32 flags, - u32 *id) +int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, + u32 prio, u32 flags, u32 *id) { + struct msm_drm_private *priv = drm->dev_private; struct msm_gpu_submitqueue *queue; if (!ctx) @@ -75,7 +76,13 @@ int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, u32 flags, kref_init(&queue->ref); queue->flags = flags; - queue->prio = prio; + + if (priv->gpu) { + if (prio >= priv->gpu->nr_rings) + return -EINVAL; + + queue->prio = prio; + } write_lock(&ctx->queuelock); @@ -91,16 +98,26 @@ int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, u32 flags, return 0; } -int msm_submitqueue_init(struct msm_file_private *ctx) +int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx) { + struct msm_drm_private *priv = drm->dev_private; + int default_prio; + if (!ctx) return 0; + /* + * Select priority 2 as the "default priority" unless nr_rings is less + * than 2 and then pick the lowest pirority + */ + default_prio = priv->gpu ? + clamp_t(uint32_t, 2, 0, priv->gpu->nr_rings - 1) : 0; + INIT_LIST_HEAD(&ctx->submitqueues); rwlock_init(&ctx->queuelock); - return msm_submitqueue_create(ctx, 2, 0, NULL); + return msm_submitqueue_create(drm, ctx, default_prio, 0, NULL); } int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id) diff --git a/include/dt-bindings/msm/msm-bus-ids.h b/include/dt-bindings/msm/msm-bus-ids.h new file mode 100644 index 000000000000..a75d304473d5 --- /dev/null +++ b/include/dt-bindings/msm/msm-bus-ids.h @@ -0,0 +1,887 @@ +/* Copyright (c) 2014-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __MSM_BUS_IDS_H +#define __MSM_BUS_IDS_H + +/* Aggregation types */ +#define AGG_SCHEME_NONE 0 +#define AGG_SCHEME_LEG 1 +#define AGG_SCHEME_1 2 + +/* Topology related enums */ +#define MSM_BUS_FAB_DEFAULT 0 +#define MSM_BUS_FAB_APPSS 0 +#define MSM_BUS_FAB_SYSTEM 1024 +#define MSM_BUS_FAB_MMSS 2048 +#define MSM_BUS_FAB_SYSTEM_FPB 3072 +#define MSM_BUS_FAB_CPSS_FPB 4096 + +#define MSM_BUS_FAB_BIMC 0 +#define MSM_BUS_FAB_SYS_NOC 1024 +#define MSM_BUS_FAB_MMSS_NOC 2048 +#define MSM_BUS_FAB_OCMEM_NOC 3072 +#define MSM_BUS_FAB_PERIPH_NOC 4096 +#define MSM_BUS_FAB_CONFIG_NOC 5120 +#define MSM_BUS_FAB_OCMEM_VNOC 6144 +#define MSM_BUS_FAB_MMSS_AHB 2049 +#define MSM_BUS_FAB_A0_NOC 6145 +#define MSM_BUS_FAB_A1_NOC 6146 +#define MSM_BUS_FAB_A2_NOC 6147 +#define MSM_BUS_FAB_GNOC 6148 +#define MSM_BUS_FAB_CR_VIRT 6149 + +#define MSM_BUS_MASTER_FIRST 1 +#define MSM_BUS_MASTER_AMPSS_M0 1 +#define MSM_BUS_MASTER_AMPSS_M1 2 +#define MSM_BUS_APPSS_MASTER_FAB_MMSS 3 +#define MSM_BUS_APPSS_MASTER_FAB_SYSTEM 4 +#define MSM_BUS_SYSTEM_MASTER_FAB_APPSS 5 +#define MSM_BUS_MASTER_SPS 6 +#define MSM_BUS_MASTER_ADM_PORT0 7 +#define MSM_BUS_MASTER_ADM_PORT1 8 +#define MSM_BUS_SYSTEM_MASTER_ADM1_PORT0 9 +#define MSM_BUS_MASTER_ADM1_PORT1 10 +#define MSM_BUS_MASTER_LPASS_PROC 11 +#define MSM_BUS_MASTER_MSS_PROCI 12 +#define MSM_BUS_MASTER_MSS_PROCD 13 +#define MSM_BUS_MASTER_MSS_MDM_PORT0 14 +#define MSM_BUS_MASTER_LPASS 15 +#define MSM_BUS_SYSTEM_MASTER_CPSS_FPB 16 +#define MSM_BUS_SYSTEM_MASTER_SYSTEM_FPB 17 +#define MSM_BUS_SYSTEM_MASTER_MMSS_FPB 18 +#define MSM_BUS_MASTER_ADM1_CI 19 +#define MSM_BUS_MASTER_ADM0_CI 20 +#define MSM_BUS_MASTER_MSS_MDM_PORT1 21 +#define MSM_BUS_MASTER_MDP_PORT0 22 +#define MSM_BUS_MASTER_MDP_PORT1 23 +#define MSM_BUS_MMSS_MASTER_ADM1_PORT0 24 +#define MSM_BUS_MASTER_ROTATOR 25 +#define MSM_BUS_MASTER_GRAPHICS_3D 26 +#define MSM_BUS_MASTER_JPEG_DEC 27 +#define MSM_BUS_MASTER_GRAPHICS_2D_CORE0 28 +#define MSM_BUS_MASTER_VFE 29 +#define MSM_BUS_MASTER_VFE0 MSM_BUS_MASTER_VFE +#define MSM_BUS_MASTER_VPE 30 +#define MSM_BUS_MASTER_JPEG_ENC 31 +#define MSM_BUS_MASTER_GRAPHICS_2D_CORE1 32 +#define MSM_BUS_MMSS_MASTER_APPS_FAB 33 +#define MSM_BUS_MASTER_HD_CODEC_PORT0 34 +#define MSM_BUS_MASTER_HD_CODEC_PORT1 35 +#define MSM_BUS_MASTER_SPDM 36 +#define MSM_BUS_MASTER_RPM 37 +#define MSM_BUS_MASTER_MSS 38 +#define MSM_BUS_MASTER_RIVA 39 +#define MSM_BUS_MASTER_SNOC_VMEM 40 +#define MSM_BUS_MASTER_MSS_SW_PROC 41 +#define MSM_BUS_MASTER_MSS_FW_PROC 42 +#define MSM_BUS_MASTER_HMSS 43 +#define MSM_BUS_MASTER_GSS_NAV 44 +#define MSM_BUS_MASTER_PCIE 45 +#define MSM_BUS_MASTER_SATA 46 +#define MSM_BUS_MASTER_CRYPTO 47 +#define MSM_BUS_MASTER_VIDEO_CAP 48 +#define MSM_BUS_MASTER_GRAPHICS_3D_PORT1 49 +#define MSM_BUS_MASTER_VIDEO_ENC 50 +#define MSM_BUS_MASTER_VIDEO_DEC 51 +#define MSM_BUS_MASTER_LPASS_AHB 52 +#define MSM_BUS_MASTER_QDSS_BAM 53 +#define MSM_BUS_MASTER_SNOC_CFG 54 +#define MSM_BUS_MASTER_CRYPTO_CORE0 55 +#define MSM_BUS_MASTER_CRYPTO_CORE1 56 +#define MSM_BUS_MASTER_MSS_NAV 57 +#define MSM_BUS_MASTER_OCMEM_DMA 58 +#define MSM_BUS_MASTER_WCSS 59 +#define MSM_BUS_MASTER_QDSS_ETR 60 +#define MSM_BUS_MASTER_USB3 61 +#define MSM_BUS_MASTER_JPEG 62 +#define MSM_BUS_MASTER_VIDEO_P0 63 +#define MSM_BUS_MASTER_VIDEO_P1 64 +#define MSM_BUS_MASTER_MSS_PROC 65 +#define MSM_BUS_MASTER_JPEG_OCMEM 66 +#define MSM_BUS_MASTER_MDP_OCMEM 67 +#define MSM_BUS_MASTER_VIDEO_P0_OCMEM 68 +#define MSM_BUS_MASTER_VIDEO_P1_OCMEM 69 +#define MSM_BUS_MASTER_VFE_OCMEM 70 +#define MSM_BUS_MASTER_CNOC_ONOC_CFG 71 +#define MSM_BUS_MASTER_RPM_INST 72 +#define MSM_BUS_MASTER_RPM_DATA 73 +#define MSM_BUS_MASTER_RPM_SYS 74 +#define MSM_BUS_MASTER_DEHR 75 +#define MSM_BUS_MASTER_QDSS_DAP 76 +#define MSM_BUS_MASTER_TIC 77 +#define MSM_BUS_MASTER_SDCC_1 78 +#define MSM_BUS_MASTER_SDCC_3 79 +#define MSM_BUS_MASTER_SDCC_4 80 +#define MSM_BUS_MASTER_SDCC_2 81 +#define MSM_BUS_MASTER_TSIF 82 +#define MSM_BUS_MASTER_BAM_DMA 83 +#define MSM_BUS_MASTER_BLSP_2 84 +#define MSM_BUS_MASTER_USB_HSIC 85 +#define MSM_BUS_MASTER_BLSP_1 86 +#define MSM_BUS_MASTER_USB_HS 87 +#define MSM_BUS_MASTER_PNOC_CFG 88 +#define MSM_BUS_MASTER_V_OCMEM_GFX3D 89 +#define MSM_BUS_MASTER_IPA 90 +#define MSM_BUS_MASTER_QPIC 91 +#define MSM_BUS_MASTER_MDPE 92 +#define MSM_BUS_MASTER_USB_HS2 93 +#define MSM_BUS_MASTER_VPU 94 +#define MSM_BUS_MASTER_UFS 95 +#define MSM_BUS_MASTER_BCAST 96 +#define MSM_BUS_MASTER_CRYPTO_CORE2 97 +#define MSM_BUS_MASTER_EMAC 98 +#define MSM_BUS_MASTER_VPU_1 99 +#define MSM_BUS_MASTER_PCIE_1 100 +#define MSM_BUS_MASTER_USB3_1 101 +#define MSM_BUS_MASTER_CNOC_MNOC_MMSS_CFG 102 +#define MSM_BUS_MASTER_CNOC_MNOC_CFG 103 +#define MSM_BUS_MASTER_TCU_0 104 +#define MSM_BUS_MASTER_TCU_1 105 +#define MSM_BUS_MASTER_CPP 106 +#define MSM_BUS_MASTER_AUDIO 107 +#define MSM_BUS_MASTER_PCIE_2 108 +#define MSM_BUS_MASTER_VFE1 109 +#define MSM_BUS_MASTER_XM_USB_HS1 110 +#define MSM_BUS_MASTER_PCNOC_BIMC_1 111 +#define MSM_BUS_MASTER_BIMC_PCNOC 112 +#define MSM_BUS_MASTER_XI_USB_HSIC 113 +#define MSM_BUS_MASTER_SGMII 114 +#define MSM_BUS_SPMI_FETCHER 115 +#define MSM_BUS_MASTER_GNOC_BIMC 116 +#define MSM_BUS_MASTER_CRVIRT_A2NOC 117 +#define MSM_BUS_MASTER_CNOC_A2NOC 118 +#define MSM_BUS_MASTER_WLAN 119 +#define MSM_BUS_MASTER_MSS_CE 120 +#define MSM_BUS_MASTER_CDSP_PROC 121 +#define MSM_BUS_MASTER_GNOC_SNOC 122 +#define MSM_BUS_MASTER_PIMEM 123 +#define MSM_BUS_MASTER_MASTER_LAST 124 + +#define MSM_BUS_SYSTEM_FPB_MASTER_SYSTEM MSM_BUS_SYSTEM_MASTER_SYSTEM_FPB +#define MSM_BUS_CPSS_FPB_MASTER_SYSTEM MSM_BUS_SYSTEM_MASTER_CPSS_FPB + +#define MSM_BUS_SNOC_MM_INT_0 10000 +#define MSM_BUS_SNOC_MM_INT_1 10001 +#define MSM_BUS_SNOC_MM_INT_2 10002 +#define MSM_BUS_SNOC_MM_INT_BIMC 10003 +#define MSM_BUS_SNOC_INT_0 10004 +#define MSM_BUS_SNOC_INT_1 10005 +#define MSM_BUS_SNOC_INT_BIMC 10006 +#define MSM_BUS_SNOC_BIMC_0_MAS 10007 +#define MSM_BUS_SNOC_BIMC_1_MAS 10008 +#define MSM_BUS_SNOC_QDSS_INT 10009 +#define MSM_BUS_PNOC_SNOC_MAS 10010 +#define MSM_BUS_PNOC_SNOC_SLV 10011 +#define MSM_BUS_PNOC_INT_0 10012 +#define MSM_BUS_PNOC_INT_1 10013 +#define MSM_BUS_PNOC_M_0 10014 +#define MSM_BUS_PNOC_M_1 10015 +#define MSM_BUS_BIMC_SNOC_MAS 10016 +#define MSM_BUS_BIMC_SNOC_SLV 10017 +#define MSM_BUS_PNOC_SLV_0 10018 +#define MSM_BUS_PNOC_SLV_1 10019 +#define MSM_BUS_PNOC_SLV_2 10020 +#define MSM_BUS_PNOC_SLV_3 10021 +#define MSM_BUS_PNOC_SLV_4 10022 +#define MSM_BUS_PNOC_SLV_8 10023 +#define MSM_BUS_PNOC_SLV_9 10024 +#define MSM_BUS_SNOC_BIMC_0_SLV 10025 +#define MSM_BUS_SNOC_BIMC_1_SLV 10026 +#define MSM_BUS_MNOC_BIMC_MAS 10027 +#define MSM_BUS_MNOC_BIMC_SLV 10028 +#define MSM_BUS_BIMC_MNOC_MAS 10029 +#define MSM_BUS_BIMC_MNOC_SLV 10030 +#define MSM_BUS_SNOC_BIMC_MAS 10031 +#define MSM_BUS_SNOC_BIMC_SLV 10032 +#define MSM_BUS_CNOC_SNOC_MAS 10033 +#define MSM_BUS_CNOC_SNOC_SLV 10034 +#define MSM_BUS_SNOC_CNOC_MAS 10035 +#define MSM_BUS_SNOC_CNOC_SLV 10036 +#define MSM_BUS_OVNOC_SNOC_MAS 10037 +#define MSM_BUS_OVNOC_SNOC_SLV 10038 +#define MSM_BUS_SNOC_OVNOC_MAS 10039 +#define MSM_BUS_SNOC_OVNOC_SLV 10040 +#define MSM_BUS_SNOC_PNOC_MAS 10041 +#define MSM_BUS_SNOC_PNOC_SLV 10042 +#define MSM_BUS_BIMC_INT_APPS_EBI 10043 +#define MSM_BUS_BIMC_INT_APPS_SNOC 10044 +#define MSM_BUS_SNOC_BIMC_2_MAS 10045 +#define MSM_BUS_SNOC_BIMC_2_SLV 10046 +#define MSM_BUS_PNOC_SLV_5 10047 +#define MSM_BUS_PNOC_SLV_7 10048 +#define MSM_BUS_PNOC_INT_2 10049 +#define MSM_BUS_PNOC_INT_3 10050 +#define MSM_BUS_PNOC_INT_4 10051 +#define MSM_BUS_PNOC_INT_5 10052 +#define MSM_BUS_PNOC_INT_6 10053 +#define MSM_BUS_PNOC_INT_7 10054 +#define MSM_BUS_BIMC_SNOC_1_MAS 10055 +#define MSM_BUS_BIMC_SNOC_1_SLV 10056 +#define MSM_BUS_PNOC_A1NOC_MAS 10057 +#define MSM_BUS_PNOC_A1NOC_SLV 10058 +#define MSM_BUS_CNOC_A1NOC_MAS 10059 +#define MSM_BUS_A0NOC_SNOC_MAS 10060 +#define MSM_BUS_A0NOC_SNOC_SLV 10061 +#define MSM_BUS_A1NOC_SNOC_SLV 10062 +#define MSM_BUS_A1NOC_SNOC_MAS 10063 +#define MSM_BUS_A2NOC_SNOC_MAS 10064 +#define MSM_BUS_A2NOC_SNOC_SLV 10065 +#define MSM_BUS_SNOC_INT_2 10066 +#define MSM_BUS_A0NOC_QDSS_INT 10067 +#define MSM_BUS_INT_LAST 10068 + +#define MSM_BUS_INT_TEST_ID 20000 +#define MSM_BUS_INT_TEST_LAST 20050 + +#define MSM_BUS_SLAVE_FIRST 512 +#define MSM_BUS_SLAVE_EBI_CH0 512 +#define MSM_BUS_SLAVE_EBI_CH1 513 +#define MSM_BUS_SLAVE_AMPSS_L2 514 +#define MSM_BUS_APPSS_SLAVE_FAB_MMSS 515 +#define MSM_BUS_APPSS_SLAVE_FAB_SYSTEM 516 +#define MSM_BUS_SYSTEM_SLAVE_FAB_APPS 517 +#define MSM_BUS_SLAVE_SPS 518 +#define MSM_BUS_SLAVE_SYSTEM_IMEM 519 +#define MSM_BUS_SLAVE_AMPSS 520 +#define MSM_BUS_SLAVE_MSS 521 +#define MSM_BUS_SLAVE_LPASS 522 +#define MSM_BUS_SYSTEM_SLAVE_CPSS_FPB 523 +#define MSM_BUS_SYSTEM_SLAVE_SYSTEM_FPB 524 +#define MSM_BUS_SYSTEM_SLAVE_MMSS_FPB 525 +#define MSM_BUS_SLAVE_CORESIGHT 526 +#define MSM_BUS_SLAVE_RIVA 527 +#define MSM_BUS_SLAVE_SMI 528 +#define MSM_BUS_MMSS_SLAVE_FAB_APPS 529 +#define MSM_BUS_MMSS_SLAVE_FAB_APPS_1 530 +#define MSM_BUS_SLAVE_MM_IMEM 531 +#define MSM_BUS_SLAVE_CRYPTO 532 +#define MSM_BUS_SLAVE_SPDM 533 +#define MSM_BUS_SLAVE_RPM 534 +#define MSM_BUS_SLAVE_RPM_MSG_RAM 535 +#define MSM_BUS_SLAVE_MPM 536 +#define MSM_BUS_SLAVE_PMIC1_SSBI1_A 537 +#define MSM_BUS_SLAVE_PMIC1_SSBI1_B 538 +#define MSM_BUS_SLAVE_PMIC1_SSBI1_C 539 +#define MSM_BUS_SLAVE_PMIC2_SSBI2_A 540 +#define MSM_BUS_SLAVE_PMIC2_SSBI2_B 541 +#define MSM_BUS_SLAVE_GSBI1_UART 542 +#define MSM_BUS_SLAVE_GSBI2_UART 543 +#define MSM_BUS_SLAVE_GSBI3_UART 544 +#define MSM_BUS_SLAVE_GSBI4_UART 545 +#define MSM_BUS_SLAVE_GSBI5_UART 546 +#define MSM_BUS_SLAVE_GSBI6_UART 547 +#define MSM_BUS_SLAVE_GSBI7_UART 548 +#define MSM_BUS_SLAVE_GSBI8_UART 549 +#define MSM_BUS_SLAVE_GSBI9_UART 550 +#define MSM_BUS_SLAVE_GSBI10_UART 551 +#define MSM_BUS_SLAVE_GSBI11_UART 552 +#define MSM_BUS_SLAVE_GSBI12_UART 553 +#define MSM_BUS_SLAVE_GSBI1_QUP 554 +#define MSM_BUS_SLAVE_GSBI2_QUP 555 +#define MSM_BUS_SLAVE_GSBI3_QUP 556 +#define MSM_BUS_SLAVE_GSBI4_QUP 557 +#define MSM_BUS_SLAVE_GSBI5_QUP 558 +#define MSM_BUS_SLAVE_GSBI6_QUP 559 +#define MSM_BUS_SLAVE_GSBI7_QUP 560 +#define MSM_BUS_SLAVE_GSBI8_QUP 561 +#define MSM_BUS_SLAVE_GSBI9_QUP 562 +#define MSM_BUS_SLAVE_GSBI10_QUP 563 +#define MSM_BUS_SLAVE_GSBI11_QUP 564 +#define MSM_BUS_SLAVE_GSBI12_QUP 565 +#define MSM_BUS_SLAVE_EBI2_NAND 566 +#define MSM_BUS_SLAVE_EBI2_CS0 567 +#define MSM_BUS_SLAVE_EBI2_CS1 568 +#define MSM_BUS_SLAVE_EBI2_CS2 569 +#define MSM_BUS_SLAVE_EBI2_CS3 570 +#define MSM_BUS_SLAVE_EBI2_CS4 571 +#define MSM_BUS_SLAVE_EBI2_CS5 572 +#define MSM_BUS_SLAVE_USB_FS1 573 +#define MSM_BUS_SLAVE_USB_FS2 574 +#define MSM_BUS_SLAVE_TSIF 575 +#define MSM_BUS_SLAVE_MSM_TSSC 576 +#define MSM_BUS_SLAVE_MSM_PDM 577 +#define MSM_BUS_SLAVE_MSM_DIMEM 578 +#define MSM_BUS_SLAVE_MSM_TCSR 579 +#define MSM_BUS_SLAVE_MSM_PRNG 580 +#define MSM_BUS_SLAVE_GSS 581 +#define MSM_BUS_SLAVE_SATA 582 +#define MSM_BUS_SLAVE_USB3 583 +#define MSM_BUS_SLAVE_WCSS 584 +#define MSM_BUS_SLAVE_OCIMEM 585 +#define MSM_BUS_SLAVE_SNOC_OCMEM 586 +#define MSM_BUS_SLAVE_SERVICE_SNOC 587 +#define MSM_BUS_SLAVE_QDSS_STM 588 +#define MSM_BUS_SLAVE_CAMERA_CFG 589 +#define MSM_BUS_SLAVE_DISPLAY_CFG 590 +#define MSM_BUS_SLAVE_OCMEM_CFG 591 +#define MSM_BUS_SLAVE_CPR_CFG 592 +#define MSM_BUS_SLAVE_CPR_XPU_CFG 593 +#define MSM_BUS_SLAVE_MISC_CFG 594 +#define MSM_BUS_SLAVE_MISC_XPU_CFG 595 +#define MSM_BUS_SLAVE_VENUS_CFG 596 +#define MSM_BUS_SLAVE_MISC_VENUS_CFG 597 +#define MSM_BUS_SLAVE_GRAPHICS_3D_CFG 598 +#define MSM_BUS_SLAVE_MMSS_CLK_CFG 599 +#define MSM_BUS_SLAVE_MMSS_CLK_XPU_CFG 600 +#define MSM_BUS_SLAVE_MNOC_MPU_CFG 601 +#define MSM_BUS_SLAVE_ONOC_MPU_CFG 602 +#define MSM_BUS_SLAVE_SERVICE_MNOC 603 +#define MSM_BUS_SLAVE_OCMEM 604 +#define MSM_BUS_SLAVE_SERVICE_ONOC 605 +#define MSM_BUS_SLAVE_SDCC_1 606 +#define MSM_BUS_SLAVE_SDCC_3 607 +#define MSM_BUS_SLAVE_SDCC_2 608 +#define MSM_BUS_SLAVE_SDCC_4 609 +#define MSM_BUS_SLAVE_BAM_DMA 610 +#define MSM_BUS_SLAVE_BLSP_2 611 +#define MSM_BUS_SLAVE_USB_HSIC 612 +#define MSM_BUS_SLAVE_BLSP_1 613 +#define MSM_BUS_SLAVE_USB_HS 614 +#define MSM_BUS_SLAVE_PDM 615 +#define MSM_BUS_SLAVE_PERIPH_APU_CFG 616 +#define MSM_BUS_SLAVE_PNOC_MPU_CFG 617 +#define MSM_BUS_SLAVE_PRNG 618 +#define MSM_BUS_SLAVE_SERVICE_PNOC 619 +#define MSM_BUS_SLAVE_CLK_CTL 620 +#define MSM_BUS_SLAVE_CNOC_MSS 621 +#define MSM_BUS_SLAVE_SECURITY 622 +#define MSM_BUS_SLAVE_TCSR 623 +#define MSM_BUS_SLAVE_TLMM 624 +#define MSM_BUS_SLAVE_CRYPTO_0_CFG 625 +#define MSM_BUS_SLAVE_CRYPTO_1_CFG 626 +#define MSM_BUS_SLAVE_IMEM_CFG 627 +#define MSM_BUS_SLAVE_MESSAGE_RAM 628 +#define MSM_BUS_SLAVE_BIMC_CFG 629 +#define MSM_BUS_SLAVE_BOOT_ROM 630 +#define MSM_BUS_SLAVE_CNOC_MNOC_MMSS_CFG 631 +#define MSM_BUS_SLAVE_PMIC_ARB 632 +#define MSM_BUS_SLAVE_SPDM_WRAPPER 633 +#define MSM_BUS_SLAVE_DEHR_CFG 634 +#define MSM_BUS_SLAVE_QDSS_CFG 635 +#define MSM_BUS_SLAVE_RBCPR_CFG 636 +#define MSM_BUS_SLAVE_RBCPR_QDSS_APU_CFG 637 +#define MSM_BUS_SLAVE_SNOC_MPU_CFG 638 +#define MSM_BUS_SLAVE_CNOC_ONOC_CFG 639 +#define MSM_BUS_SLAVE_CNOC_MNOC_CFG 640 +#define MSM_BUS_SLAVE_PNOC_CFG 641 +#define MSM_BUS_SLAVE_SNOC_CFG 642 +#define MSM_BUS_SLAVE_EBI1_DLL_CFG 643 +#define MSM_BUS_SLAVE_PHY_APU_CFG 644 +#define MSM_BUS_SLAVE_EBI1_PHY_CFG 645 +#define MSM_BUS_SLAVE_SERVICE_CNOC 646 +#define MSM_BUS_SLAVE_IPS_CFG 647 +#define MSM_BUS_SLAVE_QPIC 648 +#define MSM_BUS_SLAVE_DSI_CFG 649 +#define MSM_BUS_SLAVE_UFS_CFG 650 +#define MSM_BUS_SLAVE_RBCPR_CX_CFG 651 +#define MSM_BUS_SLAVE_RBCPR_MX_CFG 652 +#define MSM_BUS_SLAVE_PCIE_CFG 653 +#define MSM_BUS_SLAVE_USB_PHYS_CFG 654 +#define MSM_BUS_SLAVE_VIDEO_CAP_CFG 655 +#define MSM_BUS_SLAVE_AVSYNC_CFG 656 +#define MSM_BUS_SLAVE_CRYPTO_2_CFG 657 +#define MSM_BUS_SLAVE_VPU_CFG 658 +#define MSM_BUS_SLAVE_BCAST_CFG 659 +#define MSM_BUS_SLAVE_KLM_CFG 660 +#define MSM_BUS_SLAVE_GENI_IR_CFG 661 +#define MSM_BUS_SLAVE_OCMEM_GFX 662 +#define MSM_BUS_SLAVE_CATS_128 663 +#define MSM_BUS_SLAVE_OCMEM_64 664 +#define MSM_BUS_SLAVE_PCIE_0 665 +#define MSM_BUS_SLAVE_PCIE_1 666 +#define MSM_BUS_SLAVE_PCIE_0_CFG 667 +#define MSM_BUS_SLAVE_PCIE_1_CFG 668 +#define MSM_BUS_SLAVE_SRVC_MNOC 669 +#define MSM_BUS_SLAVE_USB_HS2 670 +#define MSM_BUS_SLAVE_AUDIO 671 +#define MSM_BUS_SLAVE_TCU 672 +#define MSM_BUS_SLAVE_APPSS 673 +#define MSM_BUS_SLAVE_PCIE_PARF 674 +#define MSM_BUS_SLAVE_USB3_PHY_CFG 675 +#define MSM_BUS_SLAVE_IPA_CFG 676 +#define MSM_BUS_SLAVE_A0NOC_SNOC 677 +#define MSM_BUS_SLAVE_A1NOC_SNOC 678 +#define MSM_BUS_SLAVE_A2NOC_SNOC 679 +#define MSM_BUS_SLAVE_HMSS_L3 680 +#define MSM_BUS_SLAVE_PIMEM_CFG 681 +#define MSM_BUS_SLAVE_DCC_CFG 682 +#define MSM_BUS_SLAVE_QDSS_RBCPR_APU_CFG 683 +#define MSM_BUS_SLAVE_PCIE_2_CFG 684 +#define MSM_BUS_SLAVE_PCIE20_AHB2PHY 685 +#define MSM_BUS_SLAVE_A0NOC_CFG 686 +#define MSM_BUS_SLAVE_A1NOC_CFG 687 +#define MSM_BUS_SLAVE_A2NOC_CFG 688 +#define MSM_BUS_SLAVE_A1NOC_MPU_CFG 689 +#define MSM_BUS_SLAVE_A2NOC_MPU_CFG 690 +#define MSM_BUS_SLAVE_A0NOC_SMMU_CFG 691 +#define MSM_BUS_SLAVE_A1NOC_SMMU_CFG 692 +#define MSM_BUS_SLAVE_A2NOC_SMMU_CFG 693 +#define MSM_BUS_SLAVE_LPASS_SMMU_CFG 694 +#define MSM_BUS_SLAVE_MMAGIC_CFG 695 +#define MSM_BUS_SLAVE_VENUS_THROTTLE_CFG 696 +#define MSM_BUS_SLAVE_SSC_CFG 697 +#define MSM_BUS_SLAVE_DSA_CFG 698 +#define MSM_BUS_SLAVE_DSA_MPU_CFG 699 +#define MSM_BUS_SLAVE_DISPLAY_THROTTLE_CFG 700 +#define MSM_BUS_SLAVE_SMMU_CPP_CFG 701 +#define MSM_BUS_SLAVE_SMMU_JPEG_CFG 702 +#define MSM_BUS_SLAVE_SMMU_MDP_CFG 703 +#define MSM_BUS_SLAVE_SMMU_ROTATOR_CFG 704 +#define MSM_BUS_SLAVE_SMMU_VENUS_CFG 705 +#define MSM_BUS_SLAVE_SMMU_VFE_CFG 706 +#define MSM_BUS_SLAVE_A0NOC_MPU_CFG 707 +#define MSM_BUS_SLAVE_VMEM_CFG 708 +#define MSM_BUS_SLAVE_CAMERA_THROTTLE_CFG 709 +#define MSM_BUS_SLAVE_VMEM 710 +#define MSM_BUS_SLAVE_AHB2PHY 711 +#define MSM_BUS_SLAVE_PIMEM 712 +#define MSM_BUS_SLAVE_SNOC_VMEM 713 +#define MSM_BUS_SLAVE_PCIE_2 714 +#define MSM_BUS_SLAVE_RBCPR_MX 715 +#define MSM_BUS_SLAVE_RBCPR_CX 716 +#define MSM_BUS_SLAVE_BIMC_PCNOC 717 +#define MSM_BUS_SLAVE_PCNOC_BIMC_1 718 +#define MSM_BUS_SLAVE_SGMII 719 +#define MSM_BUS_SLAVE_SPMI_FETCHER 720 +#define MSM_BUS_PNOC_SLV_6 721 +#define MSM_BUS_SLAVE_MMSS_SMMU_CFG 722 +#define MSM_BUS_SLAVE_WLAN 723 +#define MSM_BUS_SLAVE_CRVIRT_A2NOC 724 +#define MSM_BUS_SLAVE_CNOC_A2NOC 725 +#define MSM_BUS_SLAVE_GLM 726 +#define MSM_BUS_SLAVE_GNOC_BIMC 727 +#define MSM_BUS_SLAVE_GNOC_SNOC 728 +#define MSM_BUS_SLAVE_QM_CFG 729 +#define MSM_BUS_SLAVE_TLMM_EAST 730 +#define MSM_BUS_SLAVE_TLMM_NORTH 731 +#define MSM_BUS_SLAVE_TLMM_WEST 732 +#define MSM_BUS_SLAVE_SKL 733 +#define MSM_BUS_SLAVE_LPASS_TCM 734 +#define MSM_BUS_SLAVE_TLMM_SOUTH 735 +#define MSM_BUS_SLAVE_TLMM_CENTER 736 +#define MSM_BUS_MSS_NAV_CE_MPU_CFG 737 +#define MSM_BUS_SLAVE_A2NOC_THROTTLE_CFG 738 +#define MSM_BUS_SLAVE_CDSP 739 +#define MSM_BUS_SLAVE_CDSP_SMMU_CFG 740 +#define MSM_BUS_SLAVE_LPASS_MPU_CFG 741 +#define MSM_BUS_SLAVE_CSI_PHY_CFG 742 +#define MSM_BUS_SLAVE_LAST 743 + +#define MSM_BUS_SYSTEM_FPB_SLAVE_SYSTEM MSM_BUS_SYSTEM_SLAVE_SYSTEM_FPB +#define MSM_BUS_CPSS_FPB_SLAVE_SYSTEM MSM_BUS_SYSTEM_SLAVE_CPSS_FPB + +/* + * ID's used in RPM messages + */ +#define ICBID_MASTER_APPSS_PROC 0 +#define ICBID_MASTER_MSS_PROC 1 +#define ICBID_MASTER_MNOC_BIMC 2 +#define ICBID_MASTER_SNOC_BIMC 3 +#define ICBID_MASTER_SNOC_BIMC_0 ICBID_MASTER_SNOC_BIMC +#define ICBID_MASTER_CNOC_MNOC_MMSS_CFG 4 +#define ICBID_MASTER_CNOC_MNOC_CFG 5 +#define ICBID_MASTER_GFX3D 6 +#define ICBID_MASTER_JPEG 7 +#define ICBID_MASTER_MDP 8 +#define ICBID_MASTER_MDP0 ICBID_MASTER_MDP +#define ICBID_MASTER_MDPS ICBID_MASTER_MDP +#define ICBID_MASTER_VIDEO 9 +#define ICBID_MASTER_VIDEO_P0 ICBID_MASTER_VIDEO +#define ICBID_MASTER_VIDEO_P1 10 +#define ICBID_MASTER_VFE 11 +#define ICBID_MASTER_VFE0 ICBID_MASTER_VFE +#define ICBID_MASTER_CNOC_ONOC_CFG 12 +#define ICBID_MASTER_JPEG_OCMEM 13 +#define ICBID_MASTER_MDP_OCMEM 14 +#define ICBID_MASTER_VIDEO_P0_OCMEM 15 +#define ICBID_MASTER_VIDEO_P1_OCMEM 16 +#define ICBID_MASTER_VFE_OCMEM 17 +#define ICBID_MASTER_LPASS_AHB 18 +#define ICBID_MASTER_QDSS_BAM 19 +#define ICBID_MASTER_SNOC_CFG 20 +#define ICBID_MASTER_BIMC_SNOC 21 +#define ICBID_MASTER_BIMC_SNOC_0 ICBID_MASTER_BIMC_SNOC +#define ICBID_MASTER_CNOC_SNOC 22 +#define ICBID_MASTER_CRYPTO 23 +#define ICBID_MASTER_CRYPTO_CORE0 ICBID_MASTER_CRYPTO +#define ICBID_MASTER_CRYPTO_CORE1 24 +#define ICBID_MASTER_LPASS_PROC 25 +#define ICBID_MASTER_MSS 26 +#define ICBID_MASTER_MSS_NAV 27 +#define ICBID_MASTER_OCMEM_DMA 28 +#define ICBID_MASTER_PNOC_SNOC 29 +#define ICBID_MASTER_WCSS 30 +#define ICBID_MASTER_QDSS_ETR 31 +#define ICBID_MASTER_USB3 32 +#define ICBID_MASTER_USB3_0 ICBID_MASTER_USB3 +#define ICBID_MASTER_SDCC_1 33 +#define ICBID_MASTER_SDCC_3 34 +#define ICBID_MASTER_SDCC_2 35 +#define ICBID_MASTER_SDCC_4 36 +#define ICBID_MASTER_TSIF 37 +#define ICBID_MASTER_BAM_DMA 38 +#define ICBID_MASTER_BLSP_2 39 +#define ICBID_MASTER_USB_HSIC 40 +#define ICBID_MASTER_BLSP_1 41 +#define ICBID_MASTER_USB_HS 42 +#define ICBID_MASTER_USB_HS1 ICBID_MASTER_USB_HS +#define ICBID_MASTER_PNOC_CFG 43 +#define ICBID_MASTER_SNOC_PNOC 44 +#define ICBID_MASTER_RPM_INST 45 +#define ICBID_MASTER_RPM_DATA 46 +#define ICBID_MASTER_RPM_SYS 47 +#define ICBID_MASTER_DEHR 48 +#define ICBID_MASTER_QDSS_DAP 49 +#define ICBID_MASTER_SPDM 50 +#define ICBID_MASTER_TIC 51 +#define ICBID_MASTER_SNOC_CNOC 52 +#define ICBID_MASTER_GFX3D_OCMEM 53 +#define ICBID_MASTER_GFX3D_GMEM ICBID_MASTER_GFX3D_OCMEM +#define ICBID_MASTER_OVIRT_SNOC 54 +#define ICBID_MASTER_SNOC_OVIRT 55 +#define ICBID_MASTER_SNOC_GVIRT ICBID_MASTER_SNOC_OVIRT +#define ICBID_MASTER_ONOC_OVIRT 56 +#define ICBID_MASTER_USB_HS2 57 +#define ICBID_MASTER_QPIC 58 +#define ICBID_MASTER_IPA 59 +#define ICBID_MASTER_DSI 60 +#define ICBID_MASTER_MDP1 61 +#define ICBID_MASTER_MDPE ICBID_MASTER_MDP1 +#define ICBID_MASTER_VPU_PROC 62 +#define ICBID_MASTER_VPU 63 +#define ICBID_MASTER_VPU0 ICBID_MASTER_VPU +#define ICBID_MASTER_CRYPTO_CORE2 64 +#define ICBID_MASTER_PCIE_0 65 +#define ICBID_MASTER_PCIE_1 66 +#define ICBID_MASTER_SATA 67 +#define ICBID_MASTER_UFS 68 +#define ICBID_MASTER_USB3_1 69 +#define ICBID_MASTER_VIDEO_OCMEM 70 +#define ICBID_MASTER_VPU1 71 +#define ICBID_MASTER_VCAP 72 +#define ICBID_MASTER_EMAC 73 +#define ICBID_MASTER_BCAST 74 +#define ICBID_MASTER_MMSS_PROC 75 +#define ICBID_MASTER_SNOC_BIMC_1 76 +#define ICBID_MASTER_SNOC_PCNOC 77 +#define ICBID_MASTER_AUDIO 78 +#define ICBID_MASTER_MM_INT_0 79 +#define ICBID_MASTER_MM_INT_1 80 +#define ICBID_MASTER_MM_INT_2 81 +#define ICBID_MASTER_MM_INT_BIMC 82 +#define ICBID_MASTER_MSS_INT 83 +#define ICBID_MASTER_PCNOC_CFG 84 +#define ICBID_MASTER_PCNOC_INT_0 85 +#define ICBID_MASTER_PCNOC_INT_1 86 +#define ICBID_MASTER_PCNOC_M_0 87 +#define ICBID_MASTER_PCNOC_M_1 88 +#define ICBID_MASTER_PCNOC_S_0 89 +#define ICBID_MASTER_PCNOC_S_1 90 +#define ICBID_MASTER_PCNOC_S_2 91 +#define ICBID_MASTER_PCNOC_S_3 92 +#define ICBID_MASTER_PCNOC_S_4 93 +#define ICBID_MASTER_PCNOC_S_6 94 +#define ICBID_MASTER_PCNOC_S_7 95 +#define ICBID_MASTER_PCNOC_S_8 96 +#define ICBID_MASTER_PCNOC_S_9 97 +#define ICBID_MASTER_QDSS_INT 98 +#define ICBID_MASTER_SNOC_INT_0 99 +#define ICBID_MASTER_SNOC_INT_1 100 +#define ICBID_MASTER_SNOC_INT_BIMC 101 +#define ICBID_MASTER_TCU_0 102 +#define ICBID_MASTER_TCU_1 103 +#define ICBID_MASTER_BIMC_INT_0 104 +#define ICBID_MASTER_BIMC_INT_1 105 +#define ICBID_MASTER_CAMERA 106 +#define ICBID_MASTER_RICA 107 +#define ICBID_MASTER_SNOC_BIMC_2 108 +#define ICBID_MASTER_BIMC_SNOC_1 109 +#define ICBID_MASTER_A0NOC_SNOC 110 +#define ICBID_MASTER_A1NOC_SNOC 111 +#define ICBID_MASTER_A2NOC_SNOC 112 +#define ICBID_MASTER_PIMEM 113 +#define ICBID_MASTER_SNOC_VMEM 114 +#define ICBID_MASTER_CPP 115 +#define ICBID_MASTER_CNOC_A1NOC 116 +#define ICBID_MASTER_PNOC_A1NOC 117 +#define ICBID_MASTER_HMSS 118 +#define ICBID_MASTER_PCIE_2 119 +#define ICBID_MASTER_ROTATOR 120 +#define ICBID_MASTER_VENUS_VMEM 121 +#define ICBID_MASTER_DCC 122 +#define ICBID_MASTER_MCDMA 123 +#define ICBID_MASTER_PCNOC_INT_2 124 +#define ICBID_MASTER_PCNOC_INT_3 125 +#define ICBID_MASTER_PCNOC_INT_4 126 +#define ICBID_MASTER_PCNOC_INT_5 127 +#define ICBID_MASTER_PCNOC_INT_6 128 +#define ICBID_MASTER_PCNOC_S_5 129 +#define ICBID_MASTER_SENSORS_AHB 130 +#define ICBID_MASTER_SENSORS_PROC 131 +#define ICBID_MASTER_QSPI 132 +#define ICBID_MASTER_VFE1 133 +#define ICBID_MASTER_SNOC_INT_2 134 +#define ICBID_MASTER_SMMNOC_BIMC 135 +#define ICBID_MASTER_CRVIRT_A1NOC 136 +#define ICBID_MASTER_XM_USB_HS1 137 +#define ICBID_MASTER_XI_USB_HS1 138 +#define ICBID_MASTER_PCNOC_BIMC_1 139 +#define ICBID_MASTER_BIMC_PCNOC 140 +#define ICBID_MASTER_XI_HSIC 141 +#define ICBID_MASTER_SGMII 142 +#define ICBID_MASTER_SPMI_FETCHER 143 +#define ICBID_MASTER_GNOC_BIMC 144 +#define ICBID_MASTER_CRVIRT_A2NOC 145 +#define ICBID_MASTER_CNOC_A2NOC 146 +#define ICBID_MASTER_WLAN 147 +#define ICBID_MASTER_MSS_CE 148 +#define ICBID_MASTER_CDSP_PROC 149 +#define ICBID_MASTER_GNOC_SNOC 150 + +#define ICBID_SLAVE_EBI1 0 +#define ICBID_SLAVE_APPSS_L2 1 +#define ICBID_SLAVE_BIMC_SNOC 2 +#define ICBID_SLAVE_BIMC_SNOC_0 ICBID_SLAVE_BIMC_SNOC +#define ICBID_SLAVE_CAMERA_CFG 3 +#define ICBID_SLAVE_DISPLAY_CFG 4 +#define ICBID_SLAVE_OCMEM_CFG 5 +#define ICBID_SLAVE_CPR_CFG 6 +#define ICBID_SLAVE_CPR_XPU_CFG 7 +#define ICBID_SLAVE_MISC_CFG 8 +#define ICBID_SLAVE_MISC_XPU_CFG 9 +#define ICBID_SLAVE_VENUS_CFG 10 +#define ICBID_SLAVE_GFX3D_CFG 11 +#define ICBID_SLAVE_MMSS_CLK_CFG 12 +#define ICBID_SLAVE_MMSS_CLK_XPU_CFG 13 +#define ICBID_SLAVE_MNOC_MPU_CFG 14 +#define ICBID_SLAVE_ONOC_MPU_CFG 15 +#define ICBID_SLAVE_MNOC_BIMC 16 +#define ICBID_SLAVE_SERVICE_MNOC 17 +#define ICBID_SLAVE_OCMEM 18 +#define ICBID_SLAVE_GMEM ICBID_SLAVE_OCMEM +#define ICBID_SLAVE_SERVICE_ONOC 19 +#define ICBID_SLAVE_APPSS 20 +#define ICBID_SLAVE_LPASS 21 +#define ICBID_SLAVE_USB3 22 +#define ICBID_SLAVE_USB3_0 ICBID_SLAVE_USB3 +#define ICBID_SLAVE_WCSS 23 +#define ICBID_SLAVE_SNOC_BIMC 24 +#define ICBID_SLAVE_SNOC_BIMC_0 ICBID_SLAVE_SNOC_BIMC +#define ICBID_SLAVE_SNOC_CNOC 25 +#define ICBID_SLAVE_IMEM 26 +#define ICBID_SLAVE_OCIMEM ICBID_SLAVE_IMEM +#define ICBID_SLAVE_SNOC_OVIRT 27 +#define ICBID_SLAVE_SNOC_GVIRT ICBID_SLAVE_SNOC_OVIRT +#define ICBID_SLAVE_SNOC_PNOC 28 +#define ICBID_SLAVE_SNOC_PCNOC ICBID_SLAVE_SNOC_PNOC +#define ICBID_SLAVE_SERVICE_SNOC 29 +#define ICBID_SLAVE_QDSS_STM 30 +#define ICBID_SLAVE_SDCC_1 31 +#define ICBID_SLAVE_SDCC_3 32 +#define ICBID_SLAVE_SDCC_2 33 +#define ICBID_SLAVE_SDCC_4 34 +#define ICBID_SLAVE_TSIF 35 +#define ICBID_SLAVE_BAM_DMA 36 +#define ICBID_SLAVE_BLSP_2 37 +#define ICBID_SLAVE_USB_HSIC 38 +#define ICBID_SLAVE_BLSP_1 39 +#define ICBID_SLAVE_USB_HS 40 +#define ICBID_SLAVE_USB_HS1 ICBID_SLAVE_USB_HS +#define ICBID_SLAVE_PDM 41 +#define ICBID_SLAVE_PERIPH_APU_CFG 42 +#define ICBID_SLAVE_PNOC_MPU_CFG 43 +#define ICBID_SLAVE_PRNG 44 +#define ICBID_SLAVE_PNOC_SNOC 45 +#define ICBID_SLAVE_PCNOC_SNOC ICBID_SLAVE_PNOC_SNOC +#define ICBID_SLAVE_SERVICE_PNOC 46 +#define ICBID_SLAVE_CLK_CTL 47 +#define ICBID_SLAVE_CNOC_MSS 48 +#define ICBID_SLAVE_PCNOC_MSS ICBID_SLAVE_CNOC_MSS +#define ICBID_SLAVE_SECURITY 49 +#define ICBID_SLAVE_TCSR 50 +#define ICBID_SLAVE_TLMM 51 +#define ICBID_SLAVE_CRYPTO_0_CFG 52 +#define ICBID_SLAVE_CRYPTO_1_CFG 53 +#define ICBID_SLAVE_IMEM_CFG 54 +#define ICBID_SLAVE_MESSAGE_RAM 55 +#define ICBID_SLAVE_BIMC_CFG 56 +#define ICBID_SLAVE_BOOT_ROM 57 +#define ICBID_SLAVE_CNOC_MNOC_MMSS_CFG 58 +#define ICBID_SLAVE_PMIC_ARB 59 +#define ICBID_SLAVE_SPDM_WRAPPER 60 +#define ICBID_SLAVE_DEHR_CFG 61 +#define ICBID_SLAVE_MPM 62 +#define ICBID_SLAVE_QDSS_CFG 63 +#define ICBID_SLAVE_RBCPR_CFG 64 +#define ICBID_SLAVE_RBCPR_CX_CFG ICBID_SLAVE_RBCPR_CFG +#define ICBID_SLAVE_RBCPR_QDSS_APU_CFG 65 +#define ICBID_SLAVE_CNOC_MNOC_CFG 66 +#define ICBID_SLAVE_SNOC_MPU_CFG 67 +#define ICBID_SLAVE_CNOC_ONOC_CFG 68 +#define ICBID_SLAVE_PNOC_CFG 69 +#define ICBID_SLAVE_SNOC_CFG 70 +#define ICBID_SLAVE_EBI1_DLL_CFG 71 +#define ICBID_SLAVE_PHY_APU_CFG 72 +#define ICBID_SLAVE_EBI1_PHY_CFG 73 +#define ICBID_SLAVE_RPM 74 +#define ICBID_SLAVE_CNOC_SNOC 75 +#define ICBID_SLAVE_SERVICE_CNOC 76 +#define ICBID_SLAVE_OVIRT_SNOC 77 +#define ICBID_SLAVE_OVIRT_OCMEM 78 +#define ICBID_SLAVE_USB_HS2 79 +#define ICBID_SLAVE_QPIC 80 +#define ICBID_SLAVE_IPS_CFG 81 +#define ICBID_SLAVE_DSI_CFG 82 +#define ICBID_SLAVE_USB3_1 83 +#define ICBID_SLAVE_PCIE_0 84 +#define ICBID_SLAVE_PCIE_1 85 +#define ICBID_SLAVE_PSS_SMMU_CFG 86 +#define ICBID_SLAVE_CRYPTO_2_CFG 87 +#define ICBID_SLAVE_PCIE_0_CFG 88 +#define ICBID_SLAVE_PCIE_1_CFG 89 +#define ICBID_SLAVE_SATA_CFG 90 +#define ICBID_SLAVE_SPSS_GENI_IR 91 +#define ICBID_SLAVE_UFS_CFG 92 +#define ICBID_SLAVE_AVSYNC_CFG 93 +#define ICBID_SLAVE_VPU_CFG 94 +#define ICBID_SLAVE_USB_PHY_CFG 95 +#define ICBID_SLAVE_RBCPR_MX_CFG 96 +#define ICBID_SLAVE_PCIE_PARF 97 +#define ICBID_SLAVE_VCAP_CFG 98 +#define ICBID_SLAVE_EMAC_CFG 99 +#define ICBID_SLAVE_BCAST_CFG 100 +#define ICBID_SLAVE_KLM_CFG 101 +#define ICBID_SLAVE_DISPLAY_PWM 102 +#define ICBID_SLAVE_GENI 103 +#define ICBID_SLAVE_SNOC_BIMC_1 104 +#define ICBID_SLAVE_AUDIO 105 +#define ICBID_SLAVE_CATS_0 106 +#define ICBID_SLAVE_CATS_1 107 +#define ICBID_SLAVE_MM_INT_0 108 +#define ICBID_SLAVE_MM_INT_1 109 +#define ICBID_SLAVE_MM_INT_2 110 +#define ICBID_SLAVE_MM_INT_BIMC 111 +#define ICBID_SLAVE_MMU_MODEM_XPU_CFG 112 +#define ICBID_SLAVE_MSS_INT 113 +#define ICBID_SLAVE_PCNOC_INT_0 114 +#define ICBID_SLAVE_PCNOC_INT_1 115 +#define ICBID_SLAVE_PCNOC_M_0 116 +#define ICBID_SLAVE_PCNOC_M_1 117 +#define ICBID_SLAVE_PCNOC_S_0 118 +#define ICBID_SLAVE_PCNOC_S_1 119 +#define ICBID_SLAVE_PCNOC_S_2 120 +#define ICBID_SLAVE_PCNOC_S_3 121 +#define ICBID_SLAVE_PCNOC_S_4 122 +#define ICBID_SLAVE_PCNOC_S_6 123 +#define ICBID_SLAVE_PCNOC_S_7 124 +#define ICBID_SLAVE_PCNOC_S_8 125 +#define ICBID_SLAVE_PCNOC_S_9 126 +#define ICBID_SLAVE_PRNG_XPU_CFG 127 +#define ICBID_SLAVE_QDSS_INT 128 +#define ICBID_SLAVE_RPM_XPU_CFG 129 +#define ICBID_SLAVE_SNOC_INT_0 130 +#define ICBID_SLAVE_SNOC_INT_1 131 +#define ICBID_SLAVE_SNOC_INT_BIMC 132 +#define ICBID_SLAVE_TCU 133 +#define ICBID_SLAVE_BIMC_INT_0 134 +#define ICBID_SLAVE_BIMC_INT_1 135 +#define ICBID_SLAVE_RICA_CFG 136 +#define ICBID_SLAVE_SNOC_BIMC_2 137 +#define ICBID_SLAVE_BIMC_SNOC_1 138 +#define ICBID_SLAVE_PNOC_A1NOC 139 +#define ICBID_SLAVE_SNOC_VMEM 140 +#define ICBID_SLAVE_A0NOC_SNOC 141 +#define ICBID_SLAVE_A1NOC_SNOC 142 +#define ICBID_SLAVE_A2NOC_SNOC 143 +#define ICBID_SLAVE_A0NOC_CFG 144 +#define ICBID_SLAVE_A0NOC_MPU_CFG 145 +#define ICBID_SLAVE_A0NOC_SMMU_CFG 146 +#define ICBID_SLAVE_A1NOC_CFG 147 +#define ICBID_SLAVE_A1NOC_MPU_CFG 148 +#define ICBID_SLAVE_A1NOC_SMMU_CFG 149 +#define ICBID_SLAVE_A2NOC_CFG 150 +#define ICBID_SLAVE_A2NOC_MPU_CFG 151 +#define ICBID_SLAVE_A2NOC_SMMU_CFG 152 +#define ICBID_SLAVE_AHB2PHY 153 +#define ICBID_SLAVE_CAMERA_THROTTLE_CFG 154 +#define ICBID_SLAVE_DCC_CFG 155 +#define ICBID_SLAVE_DISPLAY_THROTTLE_CFG 156 +#define ICBID_SLAVE_DSA_CFG 157 +#define ICBID_SLAVE_DSA_MPU_CFG 158 +#define ICBID_SLAVE_SSC_MPU_CFG 159 +#define ICBID_SLAVE_HMSS_L3 160 +#define ICBID_SLAVE_LPASS_SMMU_CFG 161 +#define ICBID_SLAVE_MMAGIC_CFG 162 +#define ICBID_SLAVE_PCIE20_AHB2PHY 163 +#define ICBID_SLAVE_PCIE_2 164 +#define ICBID_SLAVE_PCIE_2_CFG 165 +#define ICBID_SLAVE_PIMEM 166 +#define ICBID_SLAVE_PIMEM_CFG 167 +#define ICBID_SLAVE_QDSS_RBCPR_APU_CFG 168 +#define ICBID_SLAVE_RBCPR_CX 169 +#define ICBID_SLAVE_RBCPR_MX 170 +#define ICBID_SLAVE_SMMU_CPP_CFG 171 +#define ICBID_SLAVE_SMMU_JPEG_CFG 172 +#define ICBID_SLAVE_SMMU_MDP_CFG 173 +#define ICBID_SLAVE_SMMU_ROTATOR_CFG 174 +#define ICBID_SLAVE_SMMU_VENUS_CFG 175 +#define ICBID_SLAVE_SMMU_VFE_CFG 176 +#define ICBID_SLAVE_SSC_CFG 177 +#define ICBID_SLAVE_VENUS_THROTTLE_CFG 178 +#define ICBID_SLAVE_VMEM 179 +#define ICBID_SLAVE_VMEM_CFG 180 +#define ICBID_SLAVE_QDSS_MPU_CFG 181 +#define ICBID_SLAVE_USB3_PHY_CFG 182 +#define ICBID_SLAVE_IPA_CFG 183 +#define ICBID_SLAVE_PCNOC_INT_2 184 +#define ICBID_SLAVE_PCNOC_INT_3 185 +#define ICBID_SLAVE_PCNOC_INT_4 186 +#define ICBID_SLAVE_PCNOC_INT_5 187 +#define ICBID_SLAVE_PCNOC_INT_6 188 +#define ICBID_SLAVE_PCNOC_S_5 189 +#define ICBID_SLAVE_QSPI 190 +#define ICBID_SLAVE_A1NOC_MS_MPU_CFG 191 +#define ICBID_SLAVE_A2NOC_MS_MPU_CFG 192 +#define ICBID_SLAVE_MODEM_Q6_SMMU_CFG 193 +#define ICBID_SLAVE_MSS_MPU_CFG 194 +#define ICBID_SLAVE_MSS_PROC_MS_MPU_CFG 195 +#define ICBID_SLAVE_SKL 196 +#define ICBID_SLAVE_SNOC_INT_2 197 +#define ICBID_SLAVE_SMMNOC_BIMC 198 +#define ICBID_SLAVE_CRVIRT_A1NOC 199 +#define ICBID_SLAVE_SGMII 200 +#define ICBID_SLAVE_QHS4_APPS 201 +#define ICBID_SLAVE_BIMC_PCNOC 202 +#define ICBID_SLAVE_PCNOC_BIMC_1 203 +#define ICBID_SLAVE_SPMI_FETCHER 204 +#define ICBID_SLAVE_MMSS_SMMU_CFG 205 +#define ICBID_SLAVE_WLAN 206 +#define ICBID_SLAVE_CRVIRT_A2NOC 207 +#define ICBID_SLAVE_CNOC_A2NOC 208 +#define ICBID_SLAVE_GLM 209 +#define ICBID_SLAVE_GNOC_BIMC 210 +#define ICBID_SLAVE_GNOC_SNOC 211 +#define ICBID_SLAVE_QM_CFG 212 +#define ICBID_SLAVE_TLMM_EAST 213 +#define ICBID_SLAVE_TLMM_NORTH 214 +#define ICBID_SLAVE_TLMM_WEST 215 +#define ICBID_SLAVE_LPASS_TCM 216 +#define ICBID_SLAVE_TLMM_SOUTH 217 +#define ICBID_SLAVE_TLMM_CENTER 218 +#define ICBID_SLAVE_MSS_NAV_CE_MPU_CFG 219 +#define ICBID_SLAVE_A2NOC_THROTTLE_CFG 220 +#define ICBID_SLAVE_CDSP 221 +#define ICBID_SLAVE_CDSP_SMMU_CFG 222 +#define ICBID_SLAVE_LPASS_MPU_CFG 223 +#define ICBID_SLAVE_CSI_PHY_CFG 224 +#endif diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index fee44322a69c..12d7619afa22 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -232,6 +232,7 @@ struct drm_msm_wait_fence { __u32 fence; /* in */ __u32 pad; struct drm_msm_timespec timeout; /* in */ + __u32 queueid; /* in, submitqueue id */ }; /* madvise provides a way to tell the kernel in case a buffers contents -- cgit v1.2.3 From a6e29a0eea3ccbf6fb8a908a3fc3e931f3ba2ae4 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Fri, 20 Oct 2017 11:06:58 -0600 Subject: drm/msm: Add a parameter query for the number of ringbuffers In order to manage ringbuffer priority to its fullest userspace should know how many ringbuffers it has to work with. Add a parameter to return the number of active rings. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 3 +++ include/uapi/drm/msm_drm.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index fd0fb0568dd3..4e9caf97b20f 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -57,6 +57,9 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) return ret; } return -EINVAL; + case MSM_PARAM_NR_RINGS: + *value = gpu->nr_rings; + return 0; default: DBG("%s: invalid param: %u", gpu->name, param); return -EINVAL; diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 12d7619afa22..bbbaffad772d 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -73,6 +73,7 @@ struct drm_msm_timespec { #define MSM_PARAM_MAX_FREQ 0x04 #define MSM_PARAM_TIMESTAMP 0x05 #define MSM_PARAM_GMEM_BASE 0x06 +#define MSM_PARAM_NR_RINGS 0x07 struct drm_msm_param { __u32 pipe; /* in, MSM_PIPE_x */ -- cgit v1.2.3 From bfa640757e9378c2f26867e723f1287e94f5a7ad Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 27 Oct 2017 09:45:53 -0700 Subject: bpf: rename sk_actions to align with bpf infrastructure Recent additions to support multiple programs in cgroups impose a strict requirement, "all yes is yes, any no is no". To enforce this the infrastructure requires the 'no' return code, SK_DROP in this case, to be 0. To apply these rules to SK_SKB program types the sk_actions return codes need to be adjusted. This fix adds SK_PASS and makes 'SK_DROP = 0'. Finally, remove SK_ABORTED to remove any chance that the API may allow aborted program flows to be passed up the stack. This would be incorrect behavior and allow programs to break existing policies. Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 6 +++--- kernel/bpf/sockmap.c | 3 ++- net/core/filter.c | 5 +++-- tools/include/uapi/linux/bpf.h | 4 ++-- 4 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f90860d1f897..0d7948ce2128 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -575,7 +575,7 @@ union bpf_attr { * @map: pointer to sockmap * @key: key to lookup sock in map * @flags: reserved for future use - * Return: SK_REDIRECT + * Return: SK_PASS * * int bpf_sock_map_update(skops, map, key, flags) * @skops: pointer to bpf_sock_ops @@ -786,8 +786,8 @@ struct xdp_md { }; enum sk_action { - SK_ABORTED = 0, - SK_DROP, + SK_DROP = 0, + SK_PASS, SK_REDIRECT, }; diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 6778fb773934..66f00a2b27f4 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -122,7 +122,8 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) preempt_enable(); skb->sk = NULL; - return rc; + return rc == SK_PASS ? + (TCP_SKB_CB(skb)->bpf.map ? SK_REDIRECT : SK_PASS) : SK_DROP; } static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) diff --git a/net/core/filter.c b/net/core/filter.c index 68eaa2f81a8e..6ae94f825f72 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1844,14 +1844,15 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, { struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + /* If user passes invalid input drop the packet. */ if (unlikely(flags)) - return SK_ABORTED; + return SK_DROP; tcb->bpf.key = key; tcb->bpf.flags = flags; tcb->bpf.map = map; - return SK_REDIRECT; + return SK_PASS; } struct sock *do_sk_redirect_map(struct sk_buff *skb) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 24b35a1fd4d6..c174971afbe6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -787,8 +787,8 @@ struct xdp_md { }; enum sk_action { - SK_ABORTED = 0, - SK_DROP, + SK_DROP = 0, + SK_PASS, SK_REDIRECT, }; -- cgit v1.2.3 From 978aa0474115f3f5848949f2efce4def0766a5cb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Oct 2017 19:43:57 +0800 Subject: sctp: fix some type cast warnings introduced since very beginning These warnings were found by running 'make C=2 M=net/sctp/'. They are there since very beginning. Note after this patch, there still one warning left in sctp_outq_flush(): sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM) Since it has been moved to sctp_stream_outq_migrate on net-next, to avoid the extra job when merging net-next to net, I will post the fix for it after the merging is done. Reported-by: Eric Dumazet Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 2 +- include/uapi/linux/sctp.h | 2 +- net/sctp/ipv6.c | 2 +- net/sctp/sm_make_chunk.c | 4 ++-- net/sctp/sm_sideeffect.c | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 09d7412e9cb0..da803dfc7a39 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -231,7 +231,7 @@ struct sctp_datahdr { __be32 tsn; __be16 stream; __be16 ssn; - __be32 ppid; + __u32 ppid; __u8 payload[0]; }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 6217ff8500a1..84fc2914b7fb 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -376,7 +376,7 @@ struct sctp_remote_error { __u16 sre_type; __u16 sre_flags; __u32 sre_length; - __u16 sre_error; + __be16 sre_error; sctp_assoc_t sre_assoc_id; __u8 sre_data[0]; }; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 7fe9e1d1b7ec..a6dfa86c0201 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -738,7 +738,7 @@ static int sctp_v6_skb_iif(const struct sk_buff *skb) /* Was this packet marked by Explicit Congestion Notification? */ static int sctp_v6_is_ce(const struct sk_buff *skb) { - return *((__u32 *)(ipv6_hdr(skb))) & htonl(1 << 20); + return *((__u32 *)(ipv6_hdr(skb))) & (__force __u32)htonl(1 << 20); } /* Dump the v6 addr to the seq file. */ diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 57c55045f5a7..514465b03829 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2854,7 +2854,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, addr_param_len = af->to_addr_param(addr, &addr_param); param.param_hdr.type = flags; param.param_hdr.length = htons(paramlen + addr_param_len); - param.crr_id = i; + param.crr_id = htonl(i); sctp_addto_chunk(retval, paramlen, ¶m); sctp_addto_chunk(retval, addr_param_len, &addr_param); @@ -2867,7 +2867,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, addr_param_len = af->to_addr_param(addr, &addr_param); param.param_hdr.type = SCTP_PARAM_DEL_IP; param.param_hdr.length = htons(paramlen + addr_param_len); - param.crr_id = i; + param.crr_id = htonl(i); sctp_addto_chunk(retval, paramlen, ¶m); sctp_addto_chunk(retval, addr_param_len, &addr_param); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8f2762bba879..e2d9a4b49c9c 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1607,12 +1607,12 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, break; case SCTP_CMD_INIT_FAILED: - sctp_cmd_init_failed(commands, asoc, cmd->obj.err); + sctp_cmd_init_failed(commands, asoc, cmd->obj.u32); break; case SCTP_CMD_ASSOC_FAILED: sctp_cmd_assoc_failed(commands, asoc, event_type, - subtype, chunk, cmd->obj.err); + subtype, chunk, cmd->obj.u32); break; case SCTP_CMD_INIT_COUNTER_INC: -- cgit v1.2.3 From 7e86a365a8319970e002f83c73701a86d95a69e6 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Fri, 27 Oct 2017 19:35:30 -0400 Subject: drm/amdkfd: increase limit of signal events to 4096 per process Signed-off-by: Oded Gabbay Reviewed-by: Ben Goz Signed-off-by: Felix Kuehling --- include/uapi/linux/kfd_ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 26283fefdf5f..731d0df722e3 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -169,7 +169,7 @@ struct kfd_ioctl_dbg_wave_control_args { #define KFD_IOC_WAIT_RESULT_TIMEOUT 1 #define KFD_IOC_WAIT_RESULT_FAIL 2 -#define KFD_SIGNAL_EVENT_LIMIT 256 +#define KFD_SIGNAL_EVENT_LIMIT 4096 struct kfd_ioctl_create_event_args { __u64 event_page_offset; /* from KFD */ -- cgit v1.2.3