From 6545135a5ed2eac064f23bee3a19a81cfffbe573 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 20 Jun 2017 13:39:14 +0200 Subject: drm/qxl: fix __user annotations Drop them from u64 fields, tag local variables correctly instead. While being at it switch the code to use u64_to_user_ptr(). Signed-off-by: Gerd Hoffmann Acked-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170620113916.6967-2-kraxel@redhat.com --- include/uapi/drm/qxl_drm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/qxl_drm.h b/include/uapi/drm/qxl_drm.h index 7eef42213051..880999d2d863 100644 --- a/include/uapi/drm/qxl_drm.h +++ b/include/uapi/drm/qxl_drm.h @@ -80,8 +80,8 @@ struct drm_qxl_reloc { }; struct drm_qxl_command { - __u64 __user command; /* void* */ - __u64 __user relocs; /* struct drm_qxl_reloc* */ + __u64 command; /* void* */ + __u64 relocs; /* struct drm_qxl_reloc* */ __u32 type; __u32 command_size; __u32 relocs_num; @@ -91,7 +91,7 @@ struct drm_qxl_command { struct drm_qxl_execbuffer { __u32 flags; /* for future use */ __u32 commands_num; - __u64 __user commands; /* struct drm_qxl_command* */ + __u64 commands; /* struct drm_qxl_command* */ }; struct drm_qxl_update_area { -- cgit v1.2.3 From f30994622b2bf8e4fa224237ac65304b27a9cb6a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 25 Jul 2017 11:27:17 -0700 Subject: drm/vc4: Add an ioctl for labeling GEM BOs for summary stats This has proven immensely useful for debugging memory leaks and overallocation (which is a rather serious concern on the platform, given that we typically run at about 256MB of CMA out of up to 1GB total memory, with framebuffers that are about 8MB ecah). The state of the art without this is to dump debug logs from every GL application, guess as to kernel allocations based on bo_stats, and try to merge that all together into a global picture of memory allocation state. With this, you can add a couple of calls to the debug build of the 3D driver and get a pretty detailed view of GPU memory usage from /debug/dri/0/bo_stats (or when we debug print to dmesg on allocation failure). The Mesa side currently labels at the gallium resource level (so you see that a 1920x20 pixmap has been created, presumably for the window system panel), but we could extend that to be even more useful with glObjectLabel() names being sent all the way down to the kernel. (partial) example of sorted debugfs output with Mesa labeling all resources: kernel BO cache: 16392kb BOs (3) tiling shadow 1920x1080: 8160kb BOs (1) resource 1920x1080@32/0: 8160kb BOs (1) scanout resource 1920x1080@32/0: 8100kb BOs (1) kernel: 8100kb BOs (1) v2: Use strndup_user(), use lockdep assertion instead of just a comment, fix an array[-1] reference, extend comment about name freeing. Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20170725182718.31468-2-eric@anholt.net Reviewed-by: Chris Wilson --- drivers/gpu/drm/vc4/vc4_bo.c | 258 ++++++++++++++++++++++++++++-------- drivers/gpu/drm/vc4/vc4_drv.c | 8 +- drivers/gpu/drm/vc4/vc4_drv.h | 39 +++++- drivers/gpu/drm/vc4/vc4_gem.c | 2 +- drivers/gpu/drm/vc4/vc4_render_cl.c | 2 +- drivers/gpu/drm/vc4/vc4_v3d.c | 3 +- include/uapi/drm/vc4_drm.h | 11 ++ 7 files changed, 257 insertions(+), 66 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 487f96412d35..f4387e1e178d 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -24,21 +24,35 @@ #include "vc4_drv.h" #include "uapi/drm/vc4_drm.h" +static const char * const bo_type_names[] = { + "kernel", + "V3D", + "V3D shader", + "dumb", + "binner", + "RCL", + "BCL", + "kernel BO cache", +}; + +static bool is_user_label(int label) +{ + return label >= VC4_BO_TYPE_COUNT; +} + static void vc4_bo_stats_dump(struct vc4_dev *vc4) { - DRM_INFO("num bos allocated: %d\n", - vc4->bo_stats.num_allocated); - DRM_INFO("size bos allocated: %dkb\n", - vc4->bo_stats.size_allocated / 1024); - DRM_INFO("num bos used: %d\n", - vc4->bo_stats.num_allocated - vc4->bo_stats.num_cached); - DRM_INFO("size bos used: %dkb\n", - (vc4->bo_stats.size_allocated - - vc4->bo_stats.size_cached) / 1024); - DRM_INFO("num bos cached: %d\n", - vc4->bo_stats.num_cached); - DRM_INFO("size bos cached: %dkb\n", - vc4->bo_stats.size_cached / 1024); + int i; + + for (i = 0; i < vc4->num_labels; i++) { + if (!vc4->bo_labels[i].num_allocated) + continue; + + DRM_INFO("%30s: %6dkb BOs (%d)\n", + vc4->bo_labels[i].name, + vc4->bo_labels[i].size_allocated / 1024, + vc4->bo_labels[i].num_allocated); + } } #ifdef CONFIG_DEBUG_FS @@ -47,30 +61,103 @@ int vc4_bo_stats_debugfs(struct seq_file *m, void *unused) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); - struct vc4_bo_stats stats; + int i; - /* Take a snapshot of the current stats with the lock held. */ mutex_lock(&vc4->bo_lock); - stats = vc4->bo_stats; + for (i = 0; i < vc4->num_labels; i++) { + if (!vc4->bo_labels[i].num_allocated) + continue; + + seq_printf(m, "%30s: %6dkb BOs (%d)\n", + vc4->bo_labels[i].name, + vc4->bo_labels[i].size_allocated / 1024, + vc4->bo_labels[i].num_allocated); + } mutex_unlock(&vc4->bo_lock); - seq_printf(m, "num bos allocated: %d\n", - stats.num_allocated); - seq_printf(m, "size bos allocated: %dkb\n", - stats.size_allocated / 1024); - seq_printf(m, "num bos used: %d\n", - stats.num_allocated - stats.num_cached); - seq_printf(m, "size bos used: %dkb\n", - (stats.size_allocated - stats.size_cached) / 1024); - seq_printf(m, "num bos cached: %d\n", - stats.num_cached); - seq_printf(m, "size bos cached: %dkb\n", - stats.size_cached / 1024); - return 0; } #endif +/* Takes ownership of *name and returns the appropriate slot for it in + * the bo_labels[] array, extending it as necessary. + * + * This is inefficient and could use a hash table instead of walking + * an array and strcmp()ing. However, the assumption is that user + * labeling will be infrequent (scanout buffers and other long-lived + * objects, or debug driver builds), so we can live with it for now. + */ +static int vc4_get_user_label(struct vc4_dev *vc4, const char *name) +{ + int i; + int free_slot = -1; + + for (i = 0; i < vc4->num_labels; i++) { + if (!vc4->bo_labels[i].name) { + free_slot = i; + } else if (strcmp(vc4->bo_labels[i].name, name) == 0) { + kfree(name); + return i; + } + } + + if (free_slot != -1) { + WARN_ON(vc4->bo_labels[free_slot].num_allocated != 0); + vc4->bo_labels[free_slot].name = name; + return free_slot; + } else { + u32 new_label_count = vc4->num_labels + 1; + struct vc4_label *new_labels = + krealloc(vc4->bo_labels, + new_label_count * sizeof(*new_labels), + GFP_KERNEL); + + if (!new_labels) { + kfree(name); + return -1; + } + + free_slot = vc4->num_labels; + vc4->bo_labels = new_labels; + vc4->num_labels = new_label_count; + + vc4->bo_labels[free_slot].name = name; + vc4->bo_labels[free_slot].num_allocated = 0; + vc4->bo_labels[free_slot].size_allocated = 0; + + return free_slot; + } +} + +static void vc4_bo_set_label(struct drm_gem_object *gem_obj, int label) +{ + struct vc4_bo *bo = to_vc4_bo(gem_obj); + struct vc4_dev *vc4 = to_vc4_dev(gem_obj->dev); + + lockdep_assert_held(&vc4->bo_lock); + + if (label != -1) { + vc4->bo_labels[label].num_allocated++; + vc4->bo_labels[label].size_allocated += gem_obj->size; + } + + vc4->bo_labels[bo->label].num_allocated--; + vc4->bo_labels[bo->label].size_allocated -= gem_obj->size; + + if (vc4->bo_labels[bo->label].num_allocated == 0 && + is_user_label(bo->label)) { + /* Free user BO label slots on last unreference. + * Slots are just where we track the stats for a given + * name, and once a name is unused we can reuse that + * slot. + */ + kfree(vc4->bo_labels[bo->label].name); + vc4->bo_labels[bo->label].name = NULL; + } + + bo->label = label; +} + static uint32_t bo_page_index(size_t size) { return (size / PAGE_SIZE) - 1; @@ -80,7 +167,8 @@ static uint32_t bo_page_index(size_t size) static void vc4_bo_destroy(struct vc4_bo *bo) { struct drm_gem_object *obj = &bo->base.base; - struct vc4_dev *vc4 = to_vc4_dev(obj->dev); + + vc4_bo_set_label(obj, -1); if (bo->validated_shader) { kfree(bo->validated_shader->texture_samples); @@ -88,9 +176,6 @@ static void vc4_bo_destroy(struct vc4_bo *bo) bo->validated_shader = NULL; } - vc4->bo_stats.num_allocated--; - vc4->bo_stats.size_allocated -= obj->size; - reservation_object_fini(&bo->_resv); drm_gem_cma_free_object(obj); @@ -99,12 +184,6 @@ static void vc4_bo_destroy(struct vc4_bo *bo) /* Must be called with bo_lock held. */ static void vc4_bo_remove_from_cache(struct vc4_bo *bo) { - struct drm_gem_object *obj = &bo->base.base; - struct vc4_dev *vc4 = to_vc4_dev(obj->dev); - - vc4->bo_stats.num_cached--; - vc4->bo_stats.size_cached -= obj->size; - list_del(&bo->unref_head); list_del(&bo->size_head); } @@ -165,7 +244,8 @@ static void vc4_bo_cache_purge(struct drm_device *dev) } static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev, - uint32_t size) + uint32_t size, + enum vc4_kernel_bo_type type) { struct vc4_dev *vc4 = to_vc4_dev(dev); uint32_t page_index = bo_page_index(size); @@ -186,6 +266,8 @@ static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev, kref_init(&bo->base.base.refcount); out: + if (bo) + vc4_bo_set_label(&bo->base.base, type); mutex_unlock(&vc4->bo_lock); return bo; } @@ -208,8 +290,9 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) return ERR_PTR(-ENOMEM); mutex_lock(&vc4->bo_lock); - vc4->bo_stats.num_allocated++; - vc4->bo_stats.size_allocated += size; + bo->label = VC4_BO_TYPE_KERNEL; + vc4->bo_labels[VC4_BO_TYPE_KERNEL].num_allocated++; + vc4->bo_labels[VC4_BO_TYPE_KERNEL].size_allocated += size; mutex_unlock(&vc4->bo_lock); bo->resv = &bo->_resv; reservation_object_init(bo->resv); @@ -218,7 +301,7 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) } struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, - bool allow_unzeroed) + bool allow_unzeroed, enum vc4_kernel_bo_type type) { size_t size = roundup(unaligned_size, PAGE_SIZE); struct vc4_dev *vc4 = to_vc4_dev(dev); @@ -229,7 +312,7 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, return ERR_PTR(-EINVAL); /* First, try to get a vc4_bo from the kernel BO cache. */ - bo = vc4_bo_get_from_cache(dev, size); + bo = vc4_bo_get_from_cache(dev, size, type); if (bo) { if (!allow_unzeroed) memset(bo->base.vaddr, 0, bo->base.base.size); @@ -251,7 +334,13 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, return ERR_PTR(-ENOMEM); } } - return to_vc4_bo(&cma_obj->base); + bo = to_vc4_bo(&cma_obj->base); + + mutex_lock(&vc4->bo_lock); + vc4_bo_set_label(&cma_obj->base, type); + mutex_unlock(&vc4->bo_lock); + + return bo; } int vc4_dumb_create(struct drm_file *file_priv, @@ -268,7 +357,7 @@ int vc4_dumb_create(struct drm_file *file_priv, if (args->size < args->pitch * args->height) args->size = args->pitch * args->height; - bo = vc4_bo_create(dev, args->size, false); + bo = vc4_bo_create(dev, args->size, false, VC4_BO_TYPE_DUMB); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -348,8 +437,7 @@ void vc4_free_object(struct drm_gem_object *gem_bo) list_add(&bo->size_head, cache_list); list_add(&bo->unref_head, &vc4->bo_cache.time_list); - vc4->bo_stats.num_cached++; - vc4->bo_stats.size_cached += gem_bo->size; + vc4_bo_set_label(&bo->base.base, VC4_BO_TYPE_KERNEL_CACHE); vc4_bo_cache_free_old(dev); @@ -483,7 +571,7 @@ int vc4_create_bo_ioctl(struct drm_device *dev, void *data, * We can't allocate from the BO cache, because the BOs don't * get zeroed, and that might leak data between users. */ - bo = vc4_bo_create(dev, args->size, false); + bo = vc4_bo_create(dev, args->size, false, VC4_BO_TYPE_V3D); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -536,7 +624,7 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - bo = vc4_bo_create(dev, args->size, true); + bo = vc4_bo_create(dev, args->size, true, VC4_BO_TYPE_V3D_SHADER); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -651,9 +739,24 @@ int vc4_get_tiling_ioctl(struct drm_device *dev, void *data, return 0; } -void vc4_bo_cache_init(struct drm_device *dev) +int vc4_bo_cache_init(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + int i; + + /* Create the initial set of BO labels that the kernel will + * use. This lets us avoid a bunch of string reallocation in + * the kernel's draw and BO allocation paths. + */ + vc4->bo_labels = kcalloc(VC4_BO_TYPE_COUNT, sizeof(*vc4->bo_labels), + GFP_KERNEL); + if (!vc4->bo_labels) + return -ENOMEM; + vc4->num_labels = VC4_BO_TYPE_COUNT; + + BUILD_BUG_ON(ARRAY_SIZE(bo_type_names) != VC4_BO_TYPE_COUNT); + for (i = 0; i < VC4_BO_TYPE_COUNT; i++) + vc4->bo_labels[i].name = bo_type_names[i]; mutex_init(&vc4->bo_lock); @@ -663,19 +766,66 @@ void vc4_bo_cache_init(struct drm_device *dev) setup_timer(&vc4->bo_cache.time_timer, vc4_bo_cache_time_timer, (unsigned long)dev); + + return 0; } void vc4_bo_cache_destroy(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + int i; del_timer(&vc4->bo_cache.time_timer); cancel_work_sync(&vc4->bo_cache.time_work); vc4_bo_cache_purge(dev); - if (vc4->bo_stats.num_allocated) { - DRM_ERROR("Destroying BO cache while BOs still allocated:\n"); - vc4_bo_stats_dump(vc4); + for (i = 0; i < vc4->num_labels; i++) { + if (vc4->bo_labels[i].num_allocated) { + DRM_ERROR("Destroying BO cache with %d %s " + "BOs still allocated\n", + vc4->bo_labels[i].num_allocated, + vc4->bo_labels[i].name); + } + + if (is_user_label(i)) + kfree(vc4->bo_labels[i].name); } + kfree(vc4->bo_labels); +} + +int vc4_label_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_vc4_label_bo *args = data; + char *name; + struct drm_gem_object *gem_obj; + int ret = 0, label; + + if (!args->len) + return -EINVAL; + + name = strndup_user(u64_to_user_ptr(args->name), args->len + 1); + if (IS_ERR(name)) + return PTR_ERR(name); + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); + kfree(name); + return -ENOENT; + } + + mutex_lock(&vc4->bo_lock); + label = vc4_get_user_label(vc4, name); + if (label != -1) + vc4_bo_set_label(gem_obj, label); + else + ret = -ENOMEM; + mutex_unlock(&vc4->bo_lock); + + drm_gem_object_unreference_unlocked(gem_obj); + + return ret; } diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index c6b487c3d2b7..75c1f50a7b5d 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -140,6 +140,7 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(VC4_GET_PARAM, vc4_get_param_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_SET_TILING, vc4_set_tiling_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_LABEL_BO, vc4_label_bo_ioctl, DRM_RENDER_ALLOW), }; static struct drm_driver vc4_drm_driver = { @@ -257,7 +258,9 @@ static int vc4_drm_bind(struct device *dev) vc4->dev = drm; drm->dev_private = vc4; - vc4_bo_cache_init(drm); + ret = vc4_bo_cache_init(drm); + if (ret) + goto dev_unref; drm_mode_config_init(drm); @@ -281,8 +284,9 @@ unbind_all: component_unbind_all(dev, drm); gem_destroy: vc4_gem_destroy(drm); - drm_dev_unref(drm); vc4_bo_cache_destroy(drm); +dev_unref: + drm_dev_unref(drm); return ret; } diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 1047953216a8..87f2d8e5c134 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -11,6 +11,24 @@ #include #include +/* Don't forget to update vc4_bo.c: bo_type_names[] when adding to + * this. + */ +enum vc4_kernel_bo_type { + /* Any kernel allocation (gem_create_object hook) before it + * gets another type set. + */ + VC4_BO_TYPE_KERNEL, + VC4_BO_TYPE_V3D, + VC4_BO_TYPE_V3D_SHADER, + VC4_BO_TYPE_DUMB, + VC4_BO_TYPE_BIN, + VC4_BO_TYPE_RCL, + VC4_BO_TYPE_BCL, + VC4_BO_TYPE_KERNEL_CACHE, + VC4_BO_TYPE_COUNT +}; + struct vc4_dev { struct drm_device *dev; @@ -46,14 +64,14 @@ struct vc4_dev { struct timer_list time_timer; } bo_cache; - struct vc4_bo_stats { + u32 num_labels; + struct vc4_label { + const char *name; u32 num_allocated; u32 size_allocated; - u32 num_cached; - u32 size_cached; - } bo_stats; + } *bo_labels; - /* Protects bo_cache and the BO stats. */ + /* Protects bo_cache and bo_labels. */ struct mutex bo_lock; uint64_t dma_fence_context; @@ -169,6 +187,11 @@ struct vc4_bo { /* normally (resv == &_resv) except for imported bo's */ struct reservation_object *resv; struct reservation_object _resv; + + /* One of enum vc4_kernel_bo_type, or VC4_BO_TYPE_COUNT + i + * for user-allocated labels. + */ + int label; }; static inline struct vc4_bo * @@ -460,7 +483,7 @@ struct vc4_validated_shader_info { struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size); void vc4_free_object(struct drm_gem_object *gem_obj); struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size, - bool from_cache); + bool from_cache, enum vc4_kernel_bo_type type); int vc4_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); @@ -478,6 +501,8 @@ int vc4_get_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int vc4_label_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); int vc4_mmap(struct file *filp, struct vm_area_struct *vma); struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj); int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); @@ -485,7 +510,7 @@ struct drm_gem_object *vc4_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt); void *vc4_prime_vmap(struct drm_gem_object *obj); -void vc4_bo_cache_init(struct drm_device *dev); +int vc4_bo_cache_init(struct drm_device *dev); void vc4_bo_cache_destroy(struct drm_device *dev); int vc4_bo_stats_debugfs(struct seq_file *m, void *arg); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 595f93f57821..209fccd0d3b4 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -774,7 +774,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) goto fail; } - bo = vc4_bo_create(dev, exec_size, true); + bo = vc4_bo_create(dev, exec_size, true, VC4_BO_TYPE_BCL); if (IS_ERR(bo)) { DRM_ERROR("Couldn't allocate BO for binning\n"); ret = PTR_ERR(bo); diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c index 5dc19429d4ae..4a8051532f00 100644 --- a/drivers/gpu/drm/vc4/vc4_render_cl.c +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c @@ -320,7 +320,7 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, size += xtiles * ytiles * loop_body_size; - setup->rcl = &vc4_bo_create(dev, size, true)->base; + setup->rcl = &vc4_bo_create(dev, size, true, VC4_BO_TYPE_RCL)->base; if (IS_ERR(setup->rcl)) return PTR_ERR(setup->rcl); list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head, diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 8c723da71f66..622cd43840b8 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -236,7 +236,8 @@ vc4_allocate_bin_bo(struct drm_device *drm) INIT_LIST_HEAD(&list); while (true) { - struct vc4_bo *bo = vc4_bo_create(drm, size, true); + struct vc4_bo *bo = vc4_bo_create(drm, size, true, + VC4_BO_TYPE_BIN); if (IS_ERR(bo)) { ret = PTR_ERR(bo); diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index 6ac4c5c014cb..551628e571f9 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -40,6 +40,7 @@ extern "C" { #define DRM_VC4_GET_PARAM 0x07 #define DRM_VC4_SET_TILING 0x08 #define DRM_VC4_GET_TILING 0x09 +#define DRM_VC4_LABEL_BO 0x0a #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno) @@ -51,6 +52,7 @@ extern "C" { #define DRM_IOCTL_VC4_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_PARAM, struct drm_vc4_get_param) #define DRM_IOCTL_VC4_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SET_TILING, struct drm_vc4_set_tiling) #define DRM_IOCTL_VC4_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, struct drm_vc4_get_tiling) +#define DRM_IOCTL_VC4_LABEL_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_LABEL_BO, struct drm_vc4_label_bo) struct drm_vc4_submit_rcl_surface { __u32 hindex; /* Handle index, or ~0 if not present. */ @@ -311,6 +313,15 @@ struct drm_vc4_set_tiling { __u64 modifier; }; +/** + * struct drm_vc4_label_bo - Attach a name to a BO for debug purposes. + */ +struct drm_vc4_label_bo { + __u32 handle; + __u32 len; + __u64 name; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From e6fc3b68558e4c6d8d160b5daf2511b99afa8814 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 23 Jul 2017 20:46:38 -0700 Subject: drm: Plumb modifiers through plane init This is the plumbing for supporting fb modifiers on planes. Modifiers have already been introduced to some extent, but this series will extend this to allow querying modifiers per plane. Based on this, the client to enable optimal modifications for framebuffers. This patch simply allows the DRM drivers to initialize their list of supported modifiers upon initializing the plane. v2: A minor addition from Daniel v3: * Updated commit message * s/INVALID/DRM_FORMAT_MOD_INVALID (Liviu) * Remove some excess newlines (Liviu) * Update comment for > 64 modifiers (Liviu) v4: Minor comment adjustments (Liviu) v5: Some new platforms added due to rebase v6: Add some missed plane inits (or maybe they're new - who knows at this point) (Daniel) Signed-off-by: Ben Widawsky Reviewed-by: Daniel Stone (v2) Reviewed-by: Liviu Dudau Signed-off-by: Daniel Stone --- drivers/gpu/drm/arc/arcpgu_crtc.c | 1 + drivers/gpu/drm/arm/hdlcd_crtc.c | 1 + drivers/gpu/drm/arm/malidp_planes.c | 2 +- drivers/gpu/drm/armada/armada_crtc.c | 1 + drivers/gpu/drm/armada/armada_overlay.c | 1 + drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c | 3 ++- drivers/gpu/drm/drm_modeset_helper.c | 1 + drivers/gpu/drm/drm_plane.c | 36 ++++++++++++++++++++++++- drivers/gpu/drm/drm_simple_kms_helper.c | 3 +++ drivers/gpu/drm/exynos/exynos_drm_plane.c | 2 +- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c | 2 +- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c | 1 + drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 5 +++- drivers/gpu/drm/i915/intel_sprite.c | 4 +-- drivers/gpu/drm/imx/ipuv3-plane.c | 4 +-- drivers/gpu/drm/mediatek/mtk_drm_plane.c | 2 +- drivers/gpu/drm/meson/meson_plane.c | 1 + drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c | 2 +- drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 4 +-- drivers/gpu/drm/mxsfb/mxsfb_drv.c | 2 +- drivers/gpu/drm/nouveau/nv50_display.c | 5 ++-- drivers/gpu/drm/omapdrm/omap_plane.c | 2 +- drivers/gpu/drm/pl111/pl111_display.c | 2 +- drivers/gpu/drm/qxl/qxl_display.c | 2 +- drivers/gpu/drm/rcar-du/rcar_du_plane.c | 4 +-- drivers/gpu/drm/rcar-du/rcar_du_vsp.c | 4 +-- drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 4 +-- drivers/gpu/drm/sti/sti_cursor.c | 2 +- drivers/gpu/drm/sti/sti_gdp.c | 2 +- drivers/gpu/drm/sti/sti_hqvdp.c | 2 +- drivers/gpu/drm/stm/ltdc.c | 2 +- drivers/gpu/drm/sun4i/sun4i_layer.c | 2 +- drivers/gpu/drm/sun4i/sun8i_layer.c | 2 +- drivers/gpu/drm/tegra/dc.c | 12 ++++----- drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c | 2 +- drivers/gpu/drm/vc4/vc4_plane.c | 2 +- drivers/gpu/drm/virtio/virtgpu_plane.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c | 4 +-- drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 4 +-- drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 4 +-- drivers/gpu/drm/zte/zx_plane.c | 2 +- include/drm/drm_plane.h | 22 ++++++++++++++- include/drm/drm_simple_kms_helper.h | 1 + include/uapi/drm/drm_fourcc.h | 11 ++++++++ 45 files changed, 131 insertions(+), 50 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/arc/arcpgu_crtc.c b/drivers/gpu/drm/arc/arcpgu_crtc.c index 1859dd3ad622..799416651f2f 100644 --- a/drivers/gpu/drm/arc/arcpgu_crtc.c +++ b/drivers/gpu/drm/arc/arcpgu_crtc.c @@ -217,6 +217,7 @@ static struct drm_plane *arc_pgu_plane_init(struct drm_device *drm) ret = drm_universal_plane_init(drm, plane, 0xff, &arc_pgu_plane_funcs, formats, ARRAY_SIZE(formats), + NULL, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c index 16e1e20cf04c..72b22b805412 100644 --- a/drivers/gpu/drm/arm/hdlcd_crtc.c +++ b/drivers/gpu/drm/arm/hdlcd_crtc.c @@ -315,6 +315,7 @@ static struct drm_plane *hdlcd_plane_init(struct drm_device *drm) ret = drm_universal_plane_init(drm, plane, 0xff, &hdlcd_plane_funcs, formats, ARRAY_SIZE(formats), + NULL, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) { return ERR_PTR(ret); diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 600fa7bd7f52..60402e27882f 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -398,7 +398,7 @@ int malidp_de_planes_init(struct drm_device *drm) DRM_PLANE_TYPE_OVERLAY; ret = drm_universal_plane_init(drm, &plane->base, crtcs, &malidp_de_plane_funcs, formats, - n, plane_type, NULL); + n, NULL, plane_type, NULL); if (ret < 0) goto cleanup; diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index 1ffba91fbaae..1d01ad24fe52 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -1269,6 +1269,7 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, &armada_primary_plane_funcs, armada_primary_formats, ARRAY_SIZE(armada_primary_formats), + NULL, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) { kfree(primary); diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c index 677b44f3534b..edc44910d79f 100644 --- a/drivers/gpu/drm/armada/armada_overlay.c +++ b/drivers/gpu/drm/armada/armada_overlay.c @@ -460,6 +460,7 @@ int armada_overlay_plane_create(struct drm_device *dev, unsigned long crtcs) &armada_ovl_plane_funcs, armada_ovl_formats, ARRAY_SIZE(armada_ovl_formats), + NULL, DRM_PLANE_TYPE_OVERLAY, NULL); if (ret) { kfree(dplane); diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c index b5bd9b005225..ba532bf101e0 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c @@ -1087,7 +1087,8 @@ static int atmel_hlcdc_plane_create(struct drm_device *dev, ret = drm_universal_plane_init(dev, &plane->base, 0, &layer_plane_funcs, desc->formats->formats, - desc->formats->nformats, type, NULL); + desc->formats->nformats, + NULL, type, NULL); if (ret) return ret; diff --git a/drivers/gpu/drm/drm_modeset_helper.c b/drivers/gpu/drm/drm_modeset_helper.c index 2b33825f2f93..9cb1eede0b4d 100644 --- a/drivers/gpu/drm/drm_modeset_helper.c +++ b/drivers/gpu/drm/drm_modeset_helper.c @@ -124,6 +124,7 @@ static struct drm_plane *create_primary_plane(struct drm_device *dev) &drm_primary_helper_funcs, safe_modeset_formats, ARRAY_SIZE(safe_modeset_formats), + NULL, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) { kfree(primary); diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 5dc8c4350602..d3fc561d7b48 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -70,6 +70,8 @@ static unsigned int drm_num_planes(struct drm_device *dev) * @funcs: callbacks for the new plane * @formats: array of supported formats (DRM_FORMAT\_\*) * @format_count: number of elements in @formats + * @format_modifiers: array of struct drm_format modifiers terminated by + * DRM_FORMAT_MOD_INVALID * @type: type of plane (overlay, primary, cursor) * @name: printf style format string for the plane name, or NULL for default name * @@ -82,10 +84,12 @@ int drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane, uint32_t possible_crtcs, const struct drm_plane_funcs *funcs, const uint32_t *formats, unsigned int format_count, + const uint64_t *format_modifiers, enum drm_plane_type type, const char *name, ...) { struct drm_mode_config *config = &dev->mode_config; + unsigned int format_modifier_count = 0; int ret; ret = drm_mode_object_add(dev, &plane->base, DRM_MODE_OBJECT_PLANE); @@ -105,6 +109,31 @@ int drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane, return -ENOMEM; } + /* + * First driver to need more than 64 formats needs to fix this. Each + * format is encoded as a bit and the current code only supports a u64. + */ + if (WARN_ON(format_count > 64)) + return -EINVAL; + + if (format_modifiers) { + const uint64_t *temp_modifiers = format_modifiers; + while (*temp_modifiers++ != DRM_FORMAT_MOD_INVALID) + format_modifier_count++; + } + + plane->modifier_count = format_modifier_count; + plane->modifiers = kmalloc_array(format_modifier_count, + sizeof(format_modifiers[0]), + GFP_KERNEL); + + if (format_modifier_count && !plane->modifiers) { + DRM_DEBUG_KMS("out of memory when allocating plane\n"); + kfree(plane->format_types); + drm_mode_object_unregister(dev, &plane->base); + return -ENOMEM; + } + if (name) { va_list ap; @@ -117,12 +146,15 @@ int drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane, } if (!plane->name) { kfree(plane->format_types); + kfree(plane->modifiers); drm_mode_object_unregister(dev, &plane->base); return -ENOMEM; } memcpy(plane->format_types, formats, format_count * sizeof(uint32_t)); plane->format_count = format_count; + memcpy(plane->modifiers, format_modifiers, + format_modifier_count * sizeof(format_modifiers[0])); plane->possible_crtcs = possible_crtcs; plane->type = type; @@ -205,7 +237,8 @@ int drm_plane_init(struct drm_device *dev, struct drm_plane *plane, type = is_primary ? DRM_PLANE_TYPE_PRIMARY : DRM_PLANE_TYPE_OVERLAY; return drm_universal_plane_init(dev, plane, possible_crtcs, funcs, - formats, format_count, type, NULL); + formats, format_count, + NULL, type, NULL); } EXPORT_SYMBOL(drm_plane_init); @@ -224,6 +257,7 @@ void drm_plane_cleanup(struct drm_plane *plane) drm_modeset_lock_fini(&plane->mutex); kfree(plane->format_types); + kfree(plane->modifiers); drm_mode_object_unregister(dev, &plane->base); BUG_ON(list_empty(&plane->head)); diff --git a/drivers/gpu/drm/drm_simple_kms_helper.c b/drivers/gpu/drm/drm_simple_kms_helper.c index 39c203ad59db..dc9fd109de14 100644 --- a/drivers/gpu/drm/drm_simple_kms_helper.c +++ b/drivers/gpu/drm/drm_simple_kms_helper.c @@ -199,6 +199,7 @@ EXPORT_SYMBOL(drm_simple_display_pipe_attach_bridge); * @funcs: callbacks for the display pipe (optional) * @formats: array of supported formats (DRM_FORMAT\_\*) * @format_count: number of elements in @formats + * @format_modifiers: array of formats modifiers * @connector: connector to attach and register (optional) * * Sets up a display pipeline which consist of a really simple @@ -219,6 +220,7 @@ int drm_simple_display_pipe_init(struct drm_device *dev, struct drm_simple_display_pipe *pipe, const struct drm_simple_display_pipe_funcs *funcs, const uint32_t *formats, unsigned int format_count, + const uint64_t *format_modifiers, struct drm_connector *connector) { struct drm_encoder *encoder = &pipe->encoder; @@ -233,6 +235,7 @@ int drm_simple_display_pipe_init(struct drm_device *dev, ret = drm_universal_plane_init(dev, plane, 0, &drm_simple_kms_plane_funcs, formats, format_count, + format_modifiers, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) return ret; diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c index 611b6fd65433..e5099bc9f21a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_plane.c +++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c @@ -283,7 +283,7 @@ int exynos_plane_init(struct drm_device *dev, &exynos_plane_funcs, config->pixel_formats, config->num_pixel_formats, - config->type, NULL); + NULL, config->type, NULL); if (err) { DRM_ERROR("failed to initialize plane\n"); return err; diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c index 0a20723aa6e1..9554b245746e 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c @@ -224,7 +224,7 @@ struct drm_plane *fsl_dcu_drm_primary_create_plane(struct drm_device *dev) &fsl_dcu_drm_plane_funcs, fsl_dcu_drm_plane_formats, ARRAY_SIZE(fsl_dcu_drm_plane_formats), - DRM_PLANE_TYPE_PRIMARY, NULL); + NULL, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) { kfree(primary); primary = NULL; diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c index 54a4542a40f1..bec0a8726c6b 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c @@ -181,6 +181,7 @@ static struct drm_plane *hibmc_plane_init(struct hibmc_drm_private *priv) ret = drm_universal_plane_init(dev, plane, 1, &hibmc_plane_funcs, channel_formats1, ARRAY_SIZE(channel_formats1), + NULL, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) { diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c index 706efd0c4190..9882ebd8751c 100644 --- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c +++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c @@ -910,7 +910,7 @@ static int ade_plane_init(struct drm_device *dev, struct ade_plane *aplane, return ret; ret = drm_universal_plane_init(dev, &aplane->base, 1, &ade_plane_funcs, - fmts, fmts_cnt, type, NULL); + fmts, fmts_cnt, NULL, type, NULL); if (ret) { DRM_ERROR("fail to init plane, ch=%d\n", aplane->ch); return ret; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e92fd14c06c7..8998d6a27288 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13809,18 +13809,21 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) ret = drm_universal_plane_init(&dev_priv->drm, &primary->base, 0, &intel_plane_funcs, intel_primary_formats, num_formats, + NULL, DRM_PLANE_TYPE_PRIMARY, "plane 1%c", pipe_name(pipe)); else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) ret = drm_universal_plane_init(&dev_priv->drm, &primary->base, 0, &intel_plane_funcs, intel_primary_formats, num_formats, + NULL, DRM_PLANE_TYPE_PRIMARY, "primary %c", pipe_name(pipe)); else ret = drm_universal_plane_init(&dev_priv->drm, &primary->base, 0, &intel_plane_funcs, intel_primary_formats, num_formats, + NULL, DRM_PLANE_TYPE_PRIMARY, "plane %c", plane_name(primary->plane)); if (ret) @@ -13906,7 +13909,7 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, 0, &intel_cursor_plane_funcs, intel_cursor_formats, ARRAY_SIZE(intel_cursor_formats), - DRM_PLANE_TYPE_CURSOR, + NULL, DRM_PLANE_TYPE_CURSOR, "cursor %c", pipe_name(pipe)); if (ret) goto fail; diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 94f9a1332dbf..4c6b387fa9dc 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -1171,13 +1171,13 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, ret = drm_universal_plane_init(&dev_priv->drm, &intel_plane->base, possible_crtcs, &intel_plane_funcs, plane_formats, num_plane_formats, - DRM_PLANE_TYPE_OVERLAY, + NULL, DRM_PLANE_TYPE_OVERLAY, "plane %d%c", plane + 2, pipe_name(pipe)); else ret = drm_universal_plane_init(&dev_priv->drm, &intel_plane->base, possible_crtcs, &intel_plane_funcs, plane_formats, num_plane_formats, - DRM_PLANE_TYPE_OVERLAY, + NULL, DRM_PLANE_TYPE_OVERLAY, "sprite %c", sprite_name(pipe, plane)); if (ret) goto fail; diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index 0847cc0d96a4..debde2dae7bf 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -718,8 +718,8 @@ struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu, ret = drm_universal_plane_init(dev, &ipu_plane->base, possible_crtcs, &ipu_plane_funcs, ipu_plane_formats, - ARRAY_SIZE(ipu_plane_formats), type, - NULL); + ARRAY_SIZE(ipu_plane_formats), + NULL, type, NULL); if (ret) { DRM_ERROR("failed to initialize plane\n"); kfree(ipu_plane); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index 1a59b9ab4aa8..6f121891430f 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -175,7 +175,7 @@ int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, err = drm_universal_plane_init(dev, plane, possible_crtcs, &mtk_plane_funcs, formats, - ARRAY_SIZE(formats), type, NULL); + ARRAY_SIZE(formats), NULL, type, NULL); if (err) { DRM_ERROR("failed to initialize plane\n"); return err; diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c index a32d3b6e2e12..17e96fa47868 100644 --- a/drivers/gpu/drm/meson/meson_plane.c +++ b/drivers/gpu/drm/meson/meson_plane.c @@ -223,6 +223,7 @@ int meson_plane_create(struct meson_drm *priv) &meson_plane_funcs, supported_drm_formats, ARRAY_SIZE(supported_drm_formats), + NULL, DRM_PLANE_TYPE_PRIMARY, "meson_primary_plane"); drm_plane_helper_add(plane, &meson_plane_helper_funcs); diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c index a20e3d644523..7a1ad3af08e3 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c @@ -401,7 +401,7 @@ struct drm_plane *mdp4_plane_init(struct drm_device *dev, type = private_plane ? DRM_PLANE_TYPE_PRIMARY : DRM_PLANE_TYPE_OVERLAY; ret = drm_universal_plane_init(dev, plane, 0xff, &mdp4_plane_funcs, mdp4_plane->formats, mdp4_plane->nformats, - type, NULL); + NULL, type, NULL); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index fe3a4de1a433..61dfea791f3f 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -1139,12 +1139,12 @@ struct drm_plane *mdp5_plane_init(struct drm_device *dev, ret = drm_universal_plane_init(dev, plane, 0xff, &mdp5_cursor_plane_funcs, mdp5_plane->formats, mdp5_plane->nformats, - type, NULL); + NULL, type, NULL); else ret = drm_universal_plane_init(dev, plane, 0xff, &mdp5_plane_funcs, mdp5_plane->formats, mdp5_plane->nformats, - type, NULL); + NULL, type, NULL); if (ret) goto fail; diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c index a34f41ce3599..93c38eb6d187 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c @@ -190,7 +190,7 @@ static int mxsfb_load(struct drm_device *drm, unsigned long flags) } ret = drm_simple_display_pipe_init(drm, &mxsfb->pipe, &mxsfb_funcs, - mxsfb_formats, ARRAY_SIZE(mxsfb_formats), + mxsfb_formats, ARRAY_SIZE(mxsfb_formats), NULL, &mxsfb->connector); if (ret < 0) { dev_err(drm->dev, "Cannot setup simple display pipe\n"); diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 7abfb561b00c..bd1199b67eb4 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -1083,8 +1083,9 @@ nv50_wndw_ctor(const struct nv50_wndw_func *func, struct drm_device *dev, wndw->func = func; wndw->dmac = dmac; - ret = drm_universal_plane_init(dev, &wndw->plane, 0, &nv50_wndw, format, - nformat, type, "%s-%d", name, index); + ret = drm_universal_plane_init(dev, &wndw->plane, 0, &nv50_wndw, + format, nformat, NULL, + type, "%s-%d", name, index); if (ret) return ret; diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c index 2160f64548e0..b92a8f1d0155 100644 --- a/drivers/gpu/drm/omapdrm/omap_plane.c +++ b/drivers/gpu/drm/omapdrm/omap_plane.c @@ -291,7 +291,7 @@ struct drm_plane *omap_plane_init(struct drm_device *dev, ret = drm_universal_plane_init(dev, plane, possible_crtcs, &omap_plane_funcs, formats, - nformats, type, NULL); + nformats, NULL, type, NULL); if (ret < 0) goto error; diff --git a/drivers/gpu/drm/pl111/pl111_display.c b/drivers/gpu/drm/pl111/pl111_display.c index c6ca4f1bbd49..f0139fa58d55 100644 --- a/drivers/gpu/drm/pl111/pl111_display.c +++ b/drivers/gpu/drm/pl111/pl111_display.c @@ -457,7 +457,7 @@ int pl111_display_init(struct drm_device *drm) ret = drm_simple_display_pipe_init(drm, &priv->pipe, &pl111_display_funcs, formats, ARRAY_SIZE(formats), - &priv->connector.connector); + NULL, &priv->connector.connector); if (ret) return ret; diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 5eeae89c138d..14c5613b4388 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -784,7 +784,7 @@ static struct drm_plane *qxl_create_plane(struct qxl_device *qdev, err = drm_universal_plane_init(&qdev->ddev, plane, possible_crtcs, funcs, formats, num_formats, - type, NULL); + NULL, type, NULL); if (err) goto free_plane; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.c b/drivers/gpu/drm/rcar-du/rcar_du_plane.c index dcde6288da6c..2b02eccbfb70 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_plane.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.c @@ -743,8 +743,8 @@ int rcar_du_planes_init(struct rcar_du_group *rgrp) ret = drm_universal_plane_init(rcdu->ddev, &plane->plane, crtcs, &rcar_du_plane_funcs, formats, - ARRAY_SIZE(formats), type, - NULL); + ARRAY_SIZE(formats), + NULL, type, NULL); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c index f870445ebc8d..7362d80d236b 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c @@ -439,8 +439,8 @@ int rcar_du_vsp_init(struct rcar_du_vsp *vsp) 1 << vsp->index, &rcar_du_vsp_plane_funcs, formats_kms, - ARRAY_SIZE(formats_kms), type, - NULL); + ARRAY_SIZE(formats_kms), + NULL, type, NULL); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index ee4a12da78ab..ab18659e2a6f 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -1288,7 +1288,7 @@ static int vop_create_crtc(struct vop *vop) 0, &vop_plane_funcs, win_data->phy->data_formats, win_data->phy->nformats, - win_data->type, NULL); + NULL, win_data->type, NULL); if (ret) { DRM_DEV_ERROR(vop->dev, "failed to init plane %d\n", ret); @@ -1327,7 +1327,7 @@ static int vop_create_crtc(struct vop *vop) &vop_plane_funcs, win_data->phy->data_formats, win_data->phy->nformats, - win_data->type, NULL); + NULL, win_data->type, NULL); if (ret) { DRM_DEV_ERROR(vop->dev, "failed to init overlay %d\n", ret); diff --git a/drivers/gpu/drm/sti/sti_cursor.c b/drivers/gpu/drm/sti/sti_cursor.c index 5b3a41f74f21..f31cbe898eb3 100644 --- a/drivers/gpu/drm/sti/sti_cursor.c +++ b/drivers/gpu/drm/sti/sti_cursor.c @@ -392,7 +392,7 @@ struct drm_plane *sti_cursor_create(struct drm_device *drm_dev, &sti_cursor_plane_helpers_funcs, cursor_supported_formats, ARRAY_SIZE(cursor_supported_formats), - DRM_PLANE_TYPE_CURSOR, NULL); + NULL, DRM_PLANE_TYPE_CURSOR, NULL); if (res) { DRM_ERROR("Failed to initialize universal plane\n"); goto err_plane; diff --git a/drivers/gpu/drm/sti/sti_gdp.c b/drivers/gpu/drm/sti/sti_gdp.c index 5ee0503945c8..dceedbe702d7 100644 --- a/drivers/gpu/drm/sti/sti_gdp.c +++ b/drivers/gpu/drm/sti/sti_gdp.c @@ -931,7 +931,7 @@ struct drm_plane *sti_gdp_create(struct drm_device *drm_dev, &sti_gdp_plane_helpers_funcs, gdp_supported_formats, ARRAY_SIZE(gdp_supported_formats), - type, NULL); + NULL, type, NULL); if (res) { DRM_ERROR("Failed to initialize universal plane\n"); goto err; diff --git a/drivers/gpu/drm/sti/sti_hqvdp.c b/drivers/gpu/drm/sti/sti_hqvdp.c index 53a46dda8bd5..f11230853116 100644 --- a/drivers/gpu/drm/sti/sti_hqvdp.c +++ b/drivers/gpu/drm/sti/sti_hqvdp.c @@ -1298,7 +1298,7 @@ static struct drm_plane *sti_hqvdp_create(struct drm_device *drm_dev, &sti_hqvdp_plane_helpers_funcs, hqvdp_supported_formats, ARRAY_SIZE(hqvdp_supported_formats), - DRM_PLANE_TYPE_OVERLAY, NULL); + NULL, DRM_PLANE_TYPE_OVERLAY, NULL); if (res) { DRM_ERROR("Failed to initialize universal plane\n"); return NULL; diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c index 04cc66d6b96f..ae9eb0fff06b 100644 --- a/drivers/gpu/drm/stm/ltdc.c +++ b/drivers/gpu/drm/stm/ltdc.c @@ -735,7 +735,7 @@ static struct drm_plane *ltdc_plane_create(struct drm_device *ddev, ret = drm_universal_plane_init(ddev, plane, possible_crtcs, <dc_plane_funcs, formats, nb_fmt, - type, NULL); + NULL, type, NULL); if (ret < 0) return 0; diff --git a/drivers/gpu/drm/sun4i/sun4i_layer.c b/drivers/gpu/drm/sun4i/sun4i_layer.c index ead4f9d4c1ee..d45f3a1a0a29 100644 --- a/drivers/gpu/drm/sun4i/sun4i_layer.c +++ b/drivers/gpu/drm/sun4i/sun4i_layer.c @@ -115,7 +115,7 @@ static struct sun4i_layer *sun4i_layer_init_one(struct drm_device *drm, ret = drm_universal_plane_init(drm, &layer->plane, 0, &sun4i_backend_layer_funcs, plane->formats, plane->nformats, - plane->type, NULL); + NULL, plane->type, NULL); if (ret) { dev_err(drm->dev, "Couldn't initialize layer\n"); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/sun4i/sun8i_layer.c b/drivers/gpu/drm/sun4i/sun8i_layer.c index e627eeece658..23810ff72684 100644 --- a/drivers/gpu/drm/sun4i/sun8i_layer.c +++ b/drivers/gpu/drm/sun4i/sun8i_layer.c @@ -90,7 +90,7 @@ static struct sun8i_layer *sun8i_layer_init_one(struct drm_device *drm, ret = drm_universal_plane_init(drm, &layer->plane, 0, &sun8i_mixer_layer_funcs, plane->formats, plane->nformats, - plane->type, NULL); + NULL, plane->type, NULL); if (ret) { dev_err(drm->dev, "Couldn't initialize layer\n"); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 0cb9b90e2e68..4df39112e38e 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -678,8 +678,8 @@ static struct drm_plane *tegra_dc_primary_plane_create(struct drm_device *drm, err = drm_universal_plane_init(drm, &plane->base, possible_crtcs, &tegra_primary_plane_funcs, formats, - num_formats, DRM_PLANE_TYPE_PRIMARY, - NULL); + num_formats, NULL, + DRM_PLANE_TYPE_PRIMARY, NULL); if (err < 0) { kfree(plane); return ERR_PTR(err); @@ -844,8 +844,8 @@ static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm, err = drm_universal_plane_init(drm, &plane->base, 1 << dc->pipe, &tegra_cursor_plane_funcs, formats, - num_formats, DRM_PLANE_TYPE_CURSOR, - NULL); + num_formats, NULL, + DRM_PLANE_TYPE_CURSOR, NULL); if (err < 0) { kfree(plane); return ERR_PTR(err); @@ -906,8 +906,8 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm, err = drm_universal_plane_init(drm, &plane->base, 1 << dc->pipe, &tegra_overlay_plane_funcs, formats, - num_formats, DRM_PLANE_TYPE_OVERLAY, - NULL); + num_formats, NULL, + DRM_PLANE_TYPE_OVERLAY, NULL); if (err < 0) { kfree(plane); return ERR_PTR(err); diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c b/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c index ec43fb7ad9e4..26823a4a07d8 100644 --- a/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c +++ b/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c @@ -225,7 +225,7 @@ tinydrm_display_pipe_init(struct tinydrm_device *tdev, return PTR_ERR(connector); ret = drm_simple_display_pipe_init(drm, &tdev->pipe, funcs, formats, - format_count, connector); + format_count, NULL, connector); if (ret) return ret; diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 8853e9a4f005..2968b3ebb895 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -902,7 +902,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, ret = drm_universal_plane_init(dev, plane, 0, &vc4_plane_funcs, formats, num_formats, - type, NULL); + NULL, type, NULL); drm_plane_helper_add(plane, &vc4_plane_helper_funcs); diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index adcdbd0abef6..71ba455af915 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -298,7 +298,7 @@ struct drm_plane *virtio_gpu_plane_init(struct virtio_gpu_device *vgdev, ret = drm_universal_plane_init(dev, plane, 1 << index, &virtio_gpu_plane_funcs, formats, nformats, - type, NULL); + NULL, type, NULL); if (ret) goto err_plane_init; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 6391069498d6..b8a09807c5de 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -428,7 +428,7 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) 0, &vmw_ldu_plane_funcs, vmw_primary_plane_formats, ARRAY_SIZE(vmw_primary_plane_formats), - DRM_PLANE_TYPE_PRIMARY, NULL); + NULL, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) { DRM_ERROR("Failed to initialize primary plane"); goto err_free; @@ -443,7 +443,7 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) 0, &vmw_ldu_cursor_funcs, vmw_cursor_plane_formats, ARRAY_SIZE(vmw_cursor_plane_formats), - DRM_PLANE_TYPE_CURSOR, NULL); + NULL, DRM_PLANE_TYPE_CURSOR, NULL); if (ret) { DRM_ERROR("Failed to initialize cursor plane"); drm_plane_cleanup(&ldu->base.primary); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index 854403509216..d1552d3e0652 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -624,7 +624,7 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit) 0, &vmw_sou_plane_funcs, vmw_primary_plane_formats, ARRAY_SIZE(vmw_primary_plane_formats), - DRM_PLANE_TYPE_PRIMARY, NULL); + NULL, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) { DRM_ERROR("Failed to initialize primary plane"); goto err_free; @@ -639,7 +639,7 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit) 0, &vmw_sou_cursor_funcs, vmw_cursor_plane_formats, ARRAY_SIZE(vmw_cursor_plane_formats), - DRM_PLANE_TYPE_CURSOR, NULL); + NULL, DRM_PLANE_TYPE_CURSOR, NULL); if (ret) { DRM_ERROR("Failed to initialize cursor plane"); drm_plane_cleanup(&sou->base.primary); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index ed9404a7f457..c4de4ad0543b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -1475,7 +1475,7 @@ static int vmw_stdu_init(struct vmw_private *dev_priv, unsigned unit) 0, &vmw_stdu_plane_funcs, vmw_primary_plane_formats, ARRAY_SIZE(vmw_primary_plane_formats), - DRM_PLANE_TYPE_PRIMARY, NULL); + NULL, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) { DRM_ERROR("Failed to initialize primary plane"); goto err_free; @@ -1490,7 +1490,7 @@ static int vmw_stdu_init(struct vmw_private *dev_priv, unsigned unit) 0, &vmw_stdu_cursor_funcs, vmw_cursor_plane_formats, ARRAY_SIZE(vmw_cursor_plane_formats), - DRM_PLANE_TYPE_CURSOR, NULL); + NULL, DRM_PLANE_TYPE_CURSOR, NULL); if (ret) { DRM_ERROR("Failed to initialize cursor plane"); drm_plane_cleanup(&stdu->base.primary); diff --git a/drivers/gpu/drm/zte/zx_plane.c b/drivers/gpu/drm/zte/zx_plane.c index 4a6252720c10..18e763493264 100644 --- a/drivers/gpu/drm/zte/zx_plane.c +++ b/drivers/gpu/drm/zte/zx_plane.c @@ -540,7 +540,7 @@ int zx_plane_init(struct drm_device *drm, struct zx_plane *zplane, ret = drm_universal_plane_init(drm, plane, VOU_CRTC_MASK, &zx_plane_funcs, formats, format_count, - type, NULL); + NULL, type, NULL); if (ret) { DRM_DEV_ERROR(dev, "failed to init universal plane: %d\n", ret); return ret; diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 9ab3e7044812..9d2cc3b11ae7 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -392,6 +392,22 @@ struct drm_plane_funcs { */ void (*atomic_print_state)(struct drm_printer *p, const struct drm_plane_state *state); + + /** + * @format_mod_supported: + * + * This optional hook is used for the DRM to determine if the given + * format/modifier combination is valid for the plane. This allows the + * DRM to generate the correct format bitmask (which formats apply to + * which modifier). + * + * Returns: + * + * True if the given modifier is valid for that format on the plane. + * False otherwise. + */ + bool (*format_mod_supported)(struct drm_plane *plane, uint32_t format, + uint64_t modifier); }; /** @@ -487,6 +503,9 @@ struct drm_plane { unsigned int format_count; bool format_default; + uint64_t *modifiers; + unsigned int modifier_count; + struct drm_crtc *crtc; struct drm_framebuffer *fb; @@ -527,13 +546,14 @@ struct drm_plane { #define obj_to_plane(x) container_of(x, struct drm_plane, base) -__printf(8, 9) +__printf(9, 10) int drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane, uint32_t possible_crtcs, const struct drm_plane_funcs *funcs, const uint32_t *formats, unsigned int format_count, + const uint64_t *format_modifiers, enum drm_plane_type type, const char *name, ...); int drm_plane_init(struct drm_device *dev, diff --git a/include/drm/drm_simple_kms_helper.h b/include/drm/drm_simple_kms_helper.h index 2d36538e4a17..6d9adbb46293 100644 --- a/include/drm/drm_simple_kms_helper.h +++ b/include/drm/drm_simple_kms_helper.h @@ -122,6 +122,7 @@ int drm_simple_display_pipe_init(struct drm_device *dev, struct drm_simple_display_pipe *pipe, const struct drm_simple_display_pipe_funcs *funcs, const uint32_t *formats, unsigned int format_count, + const uint64_t *format_modifiers, struct drm_connector *connector); #endif /* __LINUX_DRM_SIMPLE_KMS_HELPER_H */ diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 7586c46f68bf..76c9101a7fc6 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -185,6 +185,8 @@ extern "C" { #define DRM_FORMAT_MOD_VENDOR_BROADCOM 0x07 /* add more to the end as needed */ +#define DRM_FORMAT_RESERVED ((1ULL << 56) - 1) + #define fourcc_mod_code(vendor, val) \ ((((__u64)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | (val & 0x00ffffffffffffffULL)) @@ -196,6 +198,15 @@ extern "C" { * authoritative source for all of these. */ +/* + * Invalid Modifier + * + * This modifier can be used as a sentinel to terminate the format modifiers + * list, or to initialize a variable with an invalid modifier. It might also be + * used to report an error back to userspace for certain APIs. + */ +#define DRM_FORMAT_MOD_INVALID fourcc_mod_code(NONE, DRM_FORMAT_RESERVED) + /* * Linear Layout * -- cgit v1.2.3 From db1689aa61bd1efb5ce9b896e7aa860a85b7f1b6 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 23 Jul 2017 20:46:39 -0700 Subject: drm: Create a format/modifier blob Updated blob layout (Rob, Daniel, Kristian, xerpi) v2: * Removed __packed, and alignment (.+) * Fix indent in drm_format_modifier fields (Liviu) * Remove duplicated modifier > 64 check (Liviu) * Change comment about modifier (Liviu) * Remove arguments to blob creation, use plane instead (Liviu) * Fix data types (Ben) * Make the blob part of uapi (Daniel) v3: Remove unused ret field. Change i, and j to unsigned int (Emil) v4: Use plane->modifier_count instead of recounting (Daniel) v5: Rename modifiers to modifiers_property (Ville) Use sizeof(__u32) instead to reflect UAPI nature (Ville) Make BUILD_BUG_ON for blob header size Cc: Rob Clark Cc: Kristian H. Kristensen Signed-off-by: Ben Widawsky Reviewed-by: Daniel Stone (v2) Reviewed-by: Liviu Dudau (v2) Reviewed-by: Emil Velikov (v3) Signed-off-by: Daniel Stone Link: http://patchwork.freedesktop.org/patch/msgid/20170724034641.13369-2-ben@bwidawsk.net --- drivers/gpu/drm/drm_mode_config.c | 7 ++++ drivers/gpu/drm/drm_plane.c | 84 +++++++++++++++++++++++++++++++++++++++ include/drm/drm_mode_config.h | 6 +++ include/uapi/drm/drm_mode.h | 50 +++++++++++++++++++++++ 4 files changed, 147 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c index d9862259a2a7..74f6ff5df656 100644 --- a/drivers/gpu/drm/drm_mode_config.c +++ b/drivers/gpu/drm/drm_mode_config.c @@ -337,6 +337,13 @@ static int drm_mode_create_standard_properties(struct drm_device *dev) return -ENOMEM; dev->mode_config.gamma_lut_size_property = prop; + prop = drm_property_create(dev, + DRM_MODE_PROP_IMMUTABLE | DRM_MODE_PROP_BLOB, + "IN_FORMATS", 0); + if (!prop) + return -ENOMEM; + dev->mode_config.modifiers_property = prop; + return 0; } diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index d3fc561d7b48..5c14beee52ff 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -62,6 +62,87 @@ static unsigned int drm_num_planes(struct drm_device *dev) return num; } +static inline u32 * +formats_ptr(struct drm_format_modifier_blob *blob) +{ + return (u32 *)(((char *)blob) + blob->formats_offset); +} + +static inline struct drm_format_modifier * +modifiers_ptr(struct drm_format_modifier_blob *blob) +{ + return (struct drm_format_modifier *)(((char *)blob) + blob->modifiers_offset); +} + +static int create_in_format_blob(struct drm_device *dev, struct drm_plane *plane) +{ + const struct drm_mode_config *config = &dev->mode_config; + struct drm_property_blob *blob; + struct drm_format_modifier *mod; + size_t blob_size, formats_size, modifiers_size; + struct drm_format_modifier_blob *blob_data; + unsigned int i, j; + + formats_size = sizeof(__u32) * plane->format_count; + if (WARN_ON(!formats_size)) { + /* 0 formats are never expected */ + return 0; + } + + modifiers_size = + sizeof(struct drm_format_modifier) * plane->modifier_count; + + blob_size = sizeof(struct drm_format_modifier_blob); + /* Modifiers offset is a pointer to a struct with a 64 bit field so it + * should be naturally aligned to 8B. + */ + BUILD_BUG_ON(sizeof(struct drm_format_modifier_blob) % 8); + blob_size += ALIGN(formats_size, 8); + blob_size += modifiers_size; + + blob = drm_property_create_blob(dev, blob_size, NULL); + if (IS_ERR(blob)) + return -1; + + blob_data = (struct drm_format_modifier_blob *)blob->data; + blob_data->version = FORMAT_BLOB_CURRENT; + blob_data->count_formats = plane->format_count; + blob_data->formats_offset = sizeof(struct drm_format_modifier_blob); + blob_data->count_modifiers = plane->modifier_count; + + blob_data->modifiers_offset = + ALIGN(blob_data->formats_offset + formats_size, 8); + + memcpy(formats_ptr(blob_data), plane->format_types, formats_size); + + /* If we can't determine support, just bail */ + if (!plane->funcs->format_mod_supported) + goto done; + + mod = modifiers_ptr(blob_data); + for (i = 0; i < plane->modifier_count; i++) { + for (j = 0; j < plane->format_count; j++) { + if (plane->funcs->format_mod_supported(plane, + plane->format_types[j], + plane->modifiers[i])) { + + mod->formats |= 1 << j; + } + } + + mod->modifier = plane->modifiers[i]; + mod->offset = 0; + mod->pad = 0; + mod++; + } + +done: + drm_object_attach_property(&plane->base, config->modifiers_property, + blob->base.id); + + return 0; +} + /** * drm_universal_plane_init - Initialize a new universal plane object * @dev: DRM device @@ -181,6 +262,9 @@ int drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane, drm_object_attach_property(&plane->base, config->prop_src_h, 0); } + if (config->allow_fb_modifiers) + create_in_format_blob(dev, plane); + return 0; } EXPORT_SYMBOL(drm_universal_plane_init); diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 42981711189b..1b37368416c8 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -757,6 +757,12 @@ struct drm_mode_config { */ bool allow_fb_modifiers; + /** + * @modifiers: Plane property to list support modifier/format + * combination. + */ + struct drm_property *modifiers_property; + /* cursor size */ uint32_t cursor_width, cursor_height; diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 403339f98a92..a2bb7161f020 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -712,6 +712,56 @@ struct drm_mode_atomic { __u64 user_data; }; +struct drm_format_modifier_blob { +#define FORMAT_BLOB_CURRENT 1 + /* Version of this blob format */ + u32 version; + + /* Flags */ + u32 flags; + + /* Number of fourcc formats supported */ + u32 count_formats; + + /* Where in this blob the formats exist (in bytes) */ + u32 formats_offset; + + /* Number of drm_format_modifiers */ + u32 count_modifiers; + + /* Where in this blob the modifiers exist (in bytes) */ + u32 modifiers_offset; + + /* u32 formats[] */ + /* struct drm_format_modifier modifiers[] */ +}; + +struct drm_format_modifier { + /* Bitmask of formats in get_plane format list this info applies to. The + * offset allows a sliding window of which 64 formats (bits). + * + * Some examples: + * In today's world with < 65 formats, and formats 0, and 2 are + * supported + * 0x0000000000000005 + * ^-offset = 0, formats = 5 + * + * If the number formats grew to 128, and formats 98-102 are + * supported with the modifier: + * + * 0x0000003c00000000 0000000000000000 + * ^ + * |__offset = 64, formats = 0x3c00000000 + * + */ + __u64 formats; + __u32 offset; + __u32 pad; + + /* The modifier that applies to the >get_plane format list bitmask. */ + __u64 modifier; +}; + /** * Create a new 'blob' data property, copying length bytes from data pointer, * and returning new blob ID. -- cgit v1.2.3 From f89823c212246d0671cc51e69894a3df1a743aee Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 3 Aug 2017 18:05:50 +0100 Subject: drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface The motivation behind this new interface is expose at runtime the creation of new OA configs which can be used as part of the i915 perf open interface. This will enable the kernel to learn new configs which may be experimental, or otherwise not part of the core set currently available through the i915 perf interface. v2: Drop DRM_ERROR for userspace errors (Matthew) Add padding to userspace structure (Matthew) s/guid/uuid/ (Matthew) v3: Use u32 instead of int to iterate through registers (Matthew) v4: Lock access to dynamic config list (Lionel) v5: by Matthew: Fix uninitialized error values Fix incorrect unwiding when opening perf stream Use kmalloc_array() to store register Use uuid_is_valid() to valid config uuids Declare ioctls as write only Check padding members are set to 0 by Lionel: Return ENOENT rather than EINVAL when trying to remove non existing config v6: by Chris: Use ref counts for OA configs Store UUID in drm_i915_perf_oa_config rather then using pointer Shuffle fields of drm_i915_perf_oa_config to avoid padding v7: by Chris Rename uapi pointers fields to end with '_ptr' v8: by Andrzej, Marek, Sebastian Update register whitelisting by Lionel Add more register names for documentation Allow configuration programming in non-paranoid mode Add support for value filter for a couple of registers already programmed in other part of the kernel v9: Documentation fix (Lionel) Allow writing WAIT_FOR_RC6_EXIT only on Gen8+ (Andrzej) v10: Perform read access_ok() on register pointers (Lionel) Signed-off-by: Matthew Auld Signed-off-by: Lionel Landwerlin Signed-off-by: Andrzej Datczuk Reviewed-by: Andrzej Datczuk Link: https://patchwork.freedesktop.org/patch/msgid/20170803165812.2373-2-lionel.g.landwerlin@intel.com --- Documentation/gpu/i915.rst | 4 + drivers/gpu/drm/i915/i915_drv.c | 2 + drivers/gpu/drm/i915/i915_drv.h | 47 ++++ drivers/gpu/drm/i915/i915_perf.c | 471 +++++++++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_reg.h | 70 +++++- include/uapi/drm/i915_drm.h | 20 ++ 6 files changed, 597 insertions(+), 17 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 9c7ed3e3f1e9..46875c2bcc31 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -417,6 +417,10 @@ integrate with drm/i915 and to handle the `DRM_I915_PERF_OPEN` ioctl. :functions: i915_perf_open_ioctl .. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c :functions: i915_perf_release +.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c + :functions: i915_perf_add_config_ioctl +.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c + :functions: i915_perf_remove_config_ioctl i915 Perf Stream ---------------- diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 214555e813f1..cc25115c2db7 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2729,6 +2729,8 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM, i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM, i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), }; static struct drm_driver driver = { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 32749425d7bf..39ac2dd49ac9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1935,6 +1935,8 @@ struct i915_oa_config { struct attribute_group sysfs_metric; struct attribute *attrs[2]; struct device_attribute sysfs_metric_id; + + atomic_t ref_count; }; struct i915_perf_stream; @@ -2060,6 +2062,25 @@ struct i915_perf_stream { * struct i915_oa_ops - Gen specific implementation of an OA unit stream */ struct i915_oa_ops { + /** + * @is_valid_b_counter_reg: Validates register's address for + * programming boolean counters for a particular platform. + */ + bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv, + u32 addr); + + /** + * @is_valid_mux_reg: Validates register's address for programming mux + * for a particular platform. + */ + bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr); + + /** + * @is_valid_flex_reg: Validates register's address for programming + * flex EU filtering for a particular platform. + */ + bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr); + /** * @init_oa_buffer: Resets the head and tail pointers of the * circular buffer for periodic OA reports. @@ -2444,10 +2465,32 @@ struct drm_i915_private { struct kobject *metrics_kobj; struct ctl_table_header *sysctl_header; + /* + * Lock associated with adding/modifying/removing OA configs + * in dev_priv->perf.metrics_idr. + */ + struct mutex metrics_lock; + + /* + * List of dynamic configurations, you need to hold + * dev_priv->perf.metrics_lock to access it. + */ + struct idr metrics_idr; + + /* + * Lock associated with anything below within this structure + * except exclusive_stream. + */ struct mutex lock; struct list_head streams; struct { + /* + * The stream currently using the OA unit. If accessed + * outside a syscall associated to its file + * descriptor, you need to hold + * dev_priv->drm.struct_mutex. + */ struct i915_perf_stream *exclusive_stream; u32 specific_ctx_id; @@ -3637,6 +3680,10 @@ i915_gem_context_lookup_timeline(struct i915_gem_context *ctx, int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); void i915_oa_init_reg_state(struct intel_engine_cs *engine, struct i915_gem_context *ctx, uint32_t *reg_state); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 06a5e1e83e6c..221a996f1985 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -193,6 +193,7 @@ #include #include +#include #include "i915_drv.h" #include "i915_oa_hsw.h" @@ -357,6 +358,54 @@ struct perf_open_properties { int oa_period_exponent; }; +static void free_oa_config(struct drm_i915_private *dev_priv, + struct i915_oa_config *oa_config) +{ + if (!PTR_ERR(oa_config->flex_regs)) + kfree(oa_config->flex_regs); + if (!PTR_ERR(oa_config->b_counter_regs)) + kfree(oa_config->b_counter_regs); + if (!PTR_ERR(oa_config->mux_regs)) + kfree(oa_config->mux_regs); + kfree(oa_config); +} + +static void put_oa_config(struct drm_i915_private *dev_priv, + struct i915_oa_config *oa_config) +{ + if (!atomic_dec_and_test(&oa_config->ref_count)) + return; + + free_oa_config(dev_priv, oa_config); +} + +static int get_oa_config(struct drm_i915_private *dev_priv, + int metrics_set, + struct i915_oa_config **out_config) +{ + int ret; + + if (metrics_set == 1) { + *out_config = &dev_priv->perf.oa.test_config; + atomic_inc(&dev_priv->perf.oa.test_config.ref_count); + return 0; + } + + ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); + if (ret) + return ret; + + *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set); + if (!*out_config) + ret = -EINVAL; + else + atomic_inc(&(*out_config)->ref_count); + + mutex_unlock(&dev_priv->perf.metrics_lock); + + return ret; +} + static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv) { return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; @@ -1246,8 +1295,8 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) BUG_ON(stream != dev_priv->perf.oa.exclusive_stream); /* - * Unset exclusive_stream first, it might be checked while - * disabling the metric set on gen8+. + * Unset exclusive_stream first, it will be checked while disabling + * the metric set on gen8+. */ mutex_lock(&dev_priv->drm.struct_mutex); dev_priv->perf.oa.exclusive_stream = NULL; @@ -1263,6 +1312,8 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) if (stream->ctx) oa_put_render_ctx_id(stream); + put_oa_config(dev_priv, stream->oa_config); + if (dev_priv->perf.oa.spurious_report_rs.missed) { DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", dev_priv->perf.oa.spurious_report_rs.missed); @@ -1950,15 +2001,6 @@ static const struct i915_perf_stream_ops i915_oa_stream_ops = { .read = i915_oa_read, }; -static struct i915_oa_config *get_oa_config(struct drm_i915_private *dev_priv, - int metrics_set) -{ - if (metrics_set == 1) - return &dev_priv->perf.oa.test_config; - - return NULL; -} - /** * i915_oa_stream_init - validate combined props for OA stream and init * @stream: An i915 perf stream @@ -2062,9 +2104,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return ret; } - stream->oa_config = get_oa_config(dev_priv, props->metrics_set); - if (!stream->oa_config) - return -EINVAL; + ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config); + if (ret) + goto err_config; /* PRM - observability performance counters: * @@ -2112,8 +2154,12 @@ err_enable: free_oa_buffer(dev_priv); err_oa_buf_alloc: + put_oa_config(dev_priv, stream->oa_config); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); intel_runtime_pm_put(dev_priv); + +err_config: if (stream->ctx) oa_put_render_ctx_id(stream); @@ -2127,6 +2173,8 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine, struct drm_i915_private *dev_priv = engine->i915; struct i915_perf_stream *stream = dev_priv->perf.oa.exclusive_stream; + lockdep_assert_held(&dev_priv->drm.struct_mutex); + if (engine->id != RCS) return; @@ -2894,6 +2942,9 @@ void i915_perf_register(struct drm_i915_private *dev_priv) &dev_priv->perf.oa.test_config.sysfs_metric); if (ret) goto sysfs_error; + + atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1); + goto exit; sysfs_error: @@ -2925,6 +2976,367 @@ void i915_perf_unregister(struct drm_i915_private *dev_priv) dev_priv->perf.metrics_kobj = NULL; } +static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) +{ + static const i915_reg_t flex_eu_regs[] = { + EU_PERF_CNTL0, + EU_PERF_CNTL1, + EU_PERF_CNTL2, + EU_PERF_CNTL3, + EU_PERF_CNTL4, + EU_PERF_CNTL5, + EU_PERF_CNTL6, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { + if (flex_eu_regs[i].reg == addr) + return true; + } + return false; +} + +static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr) +{ + return (addr >= OASTARTTRIG1.reg && addr <= OASTARTTRIG8.reg) || + (addr >= OAREPORTTRIG1.reg && addr <= OAREPORTTRIG8.reg) || + (addr >= OACEC0_0.reg && addr <= OACEC7_1.reg); +} + +static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +{ + return addr == HALF_SLICE_CHICKEN2.reg || + (addr >= MICRO_BP0_0.reg && addr <= NOA_WRITE.reg) || + (addr >= OA_PERFCNT1_LO.reg && addr <= OA_PERFCNT2_HI.reg) || + (addr >= OA_PERFMATRIX_LO.reg && addr <= OA_PERFMATRIX_HI.reg); +} + +static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +{ + return gen7_is_valid_mux_addr(dev_priv, addr) || + addr == WAIT_FOR_RC6_EXIT.reg || + (addr >= RPM_CONFIG0.reg && addr <= NOA_CONFIG(8).reg); +} + +static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +{ + return gen7_is_valid_mux_addr(dev_priv, addr) || + (addr >= 0x25100 && addr <= 0x2FF90) || + addr == 0x9ec0; +} + +static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +{ + return gen7_is_valid_mux_addr(dev_priv, addr) || + (addr >= 0x182300 && addr <= 0x1823A4); +} + +static uint32_t mask_reg_value(u32 reg, u32 val) +{ + /* HALF_SLICE_CHICKEN2 is programmed with a the + * WaDisableSTUnitPowerOptimization workaround. Make sure the value + * programmed by userspace doesn't change this. + */ + if (HALF_SLICE_CHICKEN2.reg == reg) + val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE); + + /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function + * indicated by its name and a bunch of selection fields used by OA + * configs. + */ + if (WAIT_FOR_RC6_EXIT.reg == reg) + val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE); + + return val; +} + +static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv, + bool (*is_valid)(struct drm_i915_private *dev_priv, u32 addr), + u32 __user *regs, + u32 n_regs) +{ + struct i915_oa_reg *oa_regs; + int err; + u32 i; + + if (!n_regs) + return NULL; + + if (!access_ok(VERIFY_READ, regs, n_regs * sizeof(u32) * 2)) + return ERR_PTR(-EFAULT); + + /* No is_valid function means we're not allowing any register to be programmed. */ + GEM_BUG_ON(!is_valid); + if (!is_valid) + return ERR_PTR(-EINVAL); + + oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL); + if (!oa_regs) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < n_regs; i++) { + u32 addr, value; + + err = get_user(addr, regs); + if (err) + goto addr_err; + + if (!is_valid(dev_priv, addr)) { + DRM_DEBUG("Invalid oa_reg address: %X\n", addr); + err = -EINVAL; + goto addr_err; + } + + err = get_user(value, regs + 1); + if (err) + goto addr_err; + + oa_regs[i].addr = _MMIO(addr); + oa_regs[i].value = mask_reg_value(addr, value); + + regs += 2; + } + + return oa_regs; + +addr_err: + kfree(oa_regs); + return ERR_PTR(err); +} + +static ssize_t show_dynamic_id(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i915_oa_config *oa_config = + container_of(attr, typeof(*oa_config), sysfs_metric_id); + + return sprintf(buf, "%d\n", oa_config->id); +} + +static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, + struct i915_oa_config *oa_config) +{ + oa_config->sysfs_metric_id.attr.name = "id"; + oa_config->sysfs_metric_id.attr.mode = S_IRUGO; + oa_config->sysfs_metric_id.show = show_dynamic_id; + oa_config->sysfs_metric_id.store = NULL; + + oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; + oa_config->attrs[1] = NULL; + + oa_config->sysfs_metric.name = oa_config->uuid; + oa_config->sysfs_metric.attrs = oa_config->attrs; + + return sysfs_create_group(dev_priv->perf.metrics_kobj, + &oa_config->sysfs_metric); +} + +/** + * i915_perf_add_config_ioctl - DRM ioctl() for userspace to add a new OA config + * @dev: drm device + * @data: ioctl data (pointer to struct drm_i915_perf_oa_config) copied from + * userspace (unvalidated) + * @file: drm file + * + * Validates the submitted OA register to be saved into a new OA config that + * can then be used for programming the OA unit and its NOA network. + * + * Returns: A new allocated config number to be used with the perf open ioctl + * or a negative error code on failure. + */ +int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_perf_oa_config *args = data; + struct i915_oa_config *oa_config, *tmp; + int err, id; + + if (!dev_priv->perf.initialized) { + DRM_DEBUG("i915 perf interface not available for this system\n"); + return -ENOTSUPP; + } + + if (!dev_priv->perf.metrics_kobj) { + DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); + return -EINVAL; + } + + if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { + DRM_DEBUG("Insufficient privileges to add i915 OA config\n"); + return -EACCES; + } + + if ((!args->mux_regs_ptr || !args->n_mux_regs) && + (!args->boolean_regs_ptr || !args->n_boolean_regs) && + (!args->flex_regs_ptr || !args->n_flex_regs)) { + DRM_DEBUG("No OA registers given\n"); + return -EINVAL; + } + + oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); + if (!oa_config) { + DRM_DEBUG("Failed to allocate memory for the OA config\n"); + return -ENOMEM; + } + + atomic_set(&oa_config->ref_count, 1); + + if (!uuid_is_valid(args->uuid)) { + DRM_DEBUG("Invalid uuid format for OA config\n"); + err = -EINVAL; + goto reg_err; + } + + /* Last character in oa_config->uuid will be 0 because oa_config is + * kzalloc. + */ + memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid)); + + oa_config->mux_regs_len = args->n_mux_regs; + oa_config->mux_regs = + alloc_oa_regs(dev_priv, + dev_priv->perf.oa.ops.is_valid_mux_reg, + u64_to_user_ptr(args->mux_regs_ptr), + args->n_mux_regs); + + if (IS_ERR(oa_config->mux_regs)) { + DRM_DEBUG("Failed to create OA config for mux_regs\n"); + err = PTR_ERR(oa_config->mux_regs); + goto reg_err; + } + + oa_config->b_counter_regs_len = args->n_boolean_regs; + oa_config->b_counter_regs = + alloc_oa_regs(dev_priv, + dev_priv->perf.oa.ops.is_valid_b_counter_reg, + u64_to_user_ptr(args->boolean_regs_ptr), + args->n_boolean_regs); + + if (IS_ERR(oa_config->b_counter_regs)) { + DRM_DEBUG("Failed to create OA config for b_counter_regs\n"); + err = PTR_ERR(oa_config->b_counter_regs); + goto reg_err; + } + + if (INTEL_GEN(dev_priv) < 8) { + if (args->n_flex_regs != 0) { + err = -EINVAL; + goto reg_err; + } + } else { + oa_config->flex_regs_len = args->n_flex_regs; + oa_config->flex_regs = + alloc_oa_regs(dev_priv, + dev_priv->perf.oa.ops.is_valid_flex_reg, + u64_to_user_ptr(args->flex_regs_ptr), + args->n_flex_regs); + + if (IS_ERR(oa_config->flex_regs)) { + DRM_DEBUG("Failed to create OA config for flex_regs\n"); + err = PTR_ERR(oa_config->flex_regs); + goto reg_err; + } + } + + err = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); + if (err) + goto reg_err; + + /* We shouldn't have too many configs, so this iteration shouldn't be + * too costly. + */ + idr_for_each_entry(&dev_priv->perf.metrics_idr, tmp, id) { + if (!strcmp(tmp->uuid, oa_config->uuid)) { + DRM_DEBUG("OA config already exists with this uuid\n"); + err = -EADDRINUSE; + goto sysfs_err; + } + } + + err = create_dynamic_oa_sysfs_entry(dev_priv, oa_config); + if (err) { + DRM_DEBUG("Failed to create sysfs entry for OA config\n"); + goto sysfs_err; + } + + /* Config id 0 is invalid, id 1 for kernel stored test config. */ + oa_config->id = idr_alloc(&dev_priv->perf.metrics_idr, + oa_config, 2, + 0, GFP_KERNEL); + if (oa_config->id < 0) { + DRM_DEBUG("Failed to create sysfs entry for OA config\n"); + err = oa_config->id; + goto sysfs_err; + } + + mutex_unlock(&dev_priv->perf.metrics_lock); + + return oa_config->id; + +sysfs_err: + mutex_unlock(&dev_priv->perf.metrics_lock); +reg_err: + put_oa_config(dev_priv, oa_config); + DRM_DEBUG("Failed to add new OA config\n"); + return err; +} + +/** + * i915_perf_remove_config_ioctl - DRM ioctl() for userspace to remove an OA config + * @dev: drm device + * @data: ioctl data (pointer to u64 integer) copied from userspace + * @file: drm file + * + * Configs can be removed while being used, the will stop appearing in sysfs + * and their content will be freed when the stream using the config is closed. + * + * Returns: 0 on success or a negative error code on failure. + */ +int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u64 *arg = data; + struct i915_oa_config *oa_config; + int ret; + + if (!dev_priv->perf.initialized) { + DRM_DEBUG("i915 perf interface not available for this system\n"); + return -ENOTSUPP; + } + + if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { + DRM_DEBUG("Insufficient privileges to remove i915 OA config\n"); + return -EACCES; + } + + ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); + if (ret) + goto lock_err; + + oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg); + if (!oa_config) { + DRM_DEBUG("Failed to remove unknown OA config\n"); + ret = -ENOENT; + goto config_err; + } + + GEM_BUG_ON(*arg != oa_config->id); + + sysfs_remove_group(dev_priv->perf.metrics_kobj, + &oa_config->sysfs_metric); + + idr_remove(&dev_priv->perf.metrics_idr, *arg); + put_oa_config(dev_priv, oa_config); + +config_err: + mutex_unlock(&dev_priv->perf.metrics_lock); +lock_err: + return ret; +} + static struct ctl_table oa_table[] = { { .procname = "perf_stream_paranoid", @@ -2981,6 +3393,11 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.timestamp_frequency = 0; if (IS_HASWELL(dev_priv)) { + dev_priv->perf.oa.ops.is_valid_b_counter_reg = + gen7_is_valid_b_counter_addr; + dev_priv->perf.oa.ops.is_valid_mux_reg = + hsw_is_valid_mux_addr; + dev_priv->perf.oa.ops.is_valid_flex_reg = NULL; dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer; dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set; dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set; @@ -3000,6 +3417,12 @@ void i915_perf_init(struct drm_i915_private *dev_priv) * worth the complexity to maintain now that BDW+ enable * execlist mode by default. */ + dev_priv->perf.oa.ops.is_valid_b_counter_reg = + gen7_is_valid_b_counter_addr; + dev_priv->perf.oa.ops.is_valid_mux_reg = + gen8_is_valid_mux_addr; + dev_priv->perf.oa.ops.is_valid_flex_reg = + gen8_is_valid_flex_addr; dev_priv->perf.oa.ops.init_oa_buffer = gen8_init_oa_buffer; dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; @@ -3018,6 +3441,10 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.timestamp_frequency = 12500000; dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); + if (IS_CHERRYVIEW(dev_priv)) { + dev_priv->perf.oa.ops.is_valid_mux_reg = + chv_is_valid_mux_addr; + } } else if (IS_GEN9(dev_priv)) { dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; @@ -3056,10 +3483,23 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.timestamp_frequency / 2; dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); + mutex_init(&dev_priv->perf.metrics_lock); + idr_init(&dev_priv->perf.metrics_idr); + dev_priv->perf.initialized = true; } } +static int destroy_config(int id, void *p, void *data) +{ + struct drm_i915_private *dev_priv = data; + struct i915_oa_config *oa_config = p; + + put_oa_config(dev_priv, oa_config); + + return 0; +} + /** * i915_perf_fini - Counter part to i915_perf_init() * @dev_priv: i915 device instance @@ -3069,6 +3509,9 @@ void i915_perf_fini(struct drm_i915_private *dev_priv) if (!dev_priv->perf.initialized) return; + idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv); + idr_destroy(&dev_priv->perf.metrics_idr); + unregister_sysctl_table(dev_priv->perf.sysctl_header); memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops)); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0a42b6071ea1..b2546ade2c45 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -229,6 +229,28 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN8_RPCS_EU_MIN_SHIFT 0 #define GEN8_RPCS_EU_MIN_MASK (0xf << GEN8_RPCS_EU_MIN_SHIFT) +#define WAIT_FOR_RC6_EXIT _MMIO(0x20CC) +/* HSW only */ +#define HSW_SELECTIVE_READ_ADDRESSING_SHIFT 2 +#define HSW_SELECTIVE_READ_ADDRESSING_MASK (0x3 << HSW_SLECTIVE_READ_ADDRESSING_SHIFT) +#define HSW_SELECTIVE_WRITE_ADDRESS_SHIFT 4 +#define HSW_SELECTIVE_WRITE_ADDRESS_MASK (0x7 << HSW_SELECTIVE_WRITE_ADDRESS_SHIFT) +/* HSW+ */ +#define HSW_WAIT_FOR_RC6_EXIT_ENABLE (1 << 0) +#define HSW_RCS_CONTEXT_ENABLE (1 << 7) +#define HSW_RCS_INHIBIT (1 << 8) +/* Gen8 */ +#define GEN8_SELECTIVE_WRITE_ADDRESS_SHIFT 4 +#define GEN8_SELECTIVE_WRITE_ADDRESS_MASK (0x3 << GEN8_SELECTIVE_WRITE_ADDRESS_SHIFT) +#define GEN8_SELECTIVE_WRITE_ADDRESS_SHIFT 4 +#define GEN8_SELECTIVE_WRITE_ADDRESS_MASK (0x3 << GEN8_SELECTIVE_WRITE_ADDRESS_SHIFT) +#define GEN8_SELECTIVE_WRITE_ADDRESSING_ENABLE (1 << 6) +#define GEN8_SELECTIVE_READ_SUBSLICE_SELECT_SHIFT 9 +#define GEN8_SELECTIVE_READ_SUBSLICE_SELECT_MASK (0x3 << GEN8_SELECTIVE_READ_SUBSLICE_SELECT_SHIFT) +#define GEN8_SELECTIVE_READ_SLICE_SELECT_SHIFT 11 +#define GEN8_SELECTIVE_READ_SLICE_SELECT_MASK (0x3 << GEN8_SELECTIVE_READ_SLICE_SELECT_SHIFT) +#define GEN8_SELECTIVE_READ_ADDRESSING_ENABLE (1 << 13) + #define GAM_ECOCHK _MMIO(0x4090) #define BDW_DISABLE_HDC_INVALIDATION (1<<25) #define ECOCHK_SNB_BIT (1<<10) @@ -729,9 +751,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define EU_PERF_CNTL5 _MMIO(0xe55c) #define EU_PERF_CNTL6 _MMIO(0xe65c) -#define GDT_CHICKEN_BITS _MMIO(0x9840) -#define GT_NOA_ENABLE 0x00000080 - /* * OA Boolean state */ @@ -994,6 +1013,51 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OACEC7_0 _MMIO(0x27a8) #define OACEC7_1 _MMIO(0x27ac) +/* OA perf counters */ +#define OA_PERFCNT1_LO _MMIO(0x91B8) +#define OA_PERFCNT1_HI _MMIO(0x91BC) +#define OA_PERFCNT2_LO _MMIO(0x91C0) +#define OA_PERFCNT2_HI _MMIO(0x91C4) + +#define OA_PERFMATRIX_LO _MMIO(0x91C8) +#define OA_PERFMATRIX_HI _MMIO(0x91CC) + +/* RPM unit config (Gen8+) */ +#define RPM_CONFIG0 _MMIO(0x0D00) +#define RPM_CONFIG1 _MMIO(0x0D04) + +/* RPC unit config (Gen8+) */ +#define RPM_CONFIG _MMIO(0x0D08) + +/* NOA (Gen8+) */ +#define NOA_CONFIG(i) _MMIO(0x0D0C + (i) * 4) + +#define MICRO_BP0_0 _MMIO(0x9800) +#define MICRO_BP0_2 _MMIO(0x9804) +#define MICRO_BP0_1 _MMIO(0x9808) + +#define MICRO_BP1_0 _MMIO(0x980C) +#define MICRO_BP1_2 _MMIO(0x9810) +#define MICRO_BP1_1 _MMIO(0x9814) + +#define MICRO_BP2_0 _MMIO(0x9818) +#define MICRO_BP2_2 _MMIO(0x981C) +#define MICRO_BP2_1 _MMIO(0x9820) + +#define MICRO_BP3_0 _MMIO(0x9824) +#define MICRO_BP3_2 _MMIO(0x9828) +#define MICRO_BP3_1 _MMIO(0x982C) + +#define MICRO_BP_TRIGGER _MMIO(0x9830) +#define MICRO_BP3_COUNT_STATUS01 _MMIO(0x9834) +#define MICRO_BP3_COUNT_STATUS23 _MMIO(0x9838) +#define MICRO_BP_FIRED_ARMED _MMIO(0x983C) + +#define GDT_CHICKEN_BITS _MMIO(0x9840) +#define GT_NOA_ENABLE 0x00000080 + +#define NOA_DATA _MMIO(0x986C) +#define NOA_WRITE _MMIO(0x9888) #define _GEN7_PIPEA_DE_LOAD_SL 0x70068 #define _GEN7_PIPEB_DE_LOAD_SL 0x71068 diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 7ccbd6a2bbe0..ce3833fa1e06 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -260,6 +260,8 @@ typedef struct _drm_i915_sarea { #define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 #define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 #define DRM_I915_PERF_OPEN 0x36 +#define DRM_I915_PERF_ADD_CONFIG 0x37 +#define DRM_I915_PERF_REMOVE_CONFIG 0x38 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -315,6 +317,8 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param) #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param) #define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) +#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config) +#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -1467,6 +1471,22 @@ enum drm_i915_perf_record_type { DRM_I915_PERF_RECORD_MAX /* non-ABI */ }; +/** + * Structure to upload perf dynamic configuration into the kernel. + */ +struct drm_i915_perf_oa_config { + /** String formatted like "%08x-%04x-%04x-%04x-%012x" */ + char uuid[36]; + + __u32 n_mux_regs; + __u32 n_boolean_regs; + __u32 n_flex_regs; + + __u64 __user mux_regs_ptr; + __u64 __user boolean_regs_ptr; + __u64 __user flex_regs_ptr; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From adb8a5a5eb9f16997f11ecacf25a647134011dd7 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Sun, 6 Aug 2017 18:44:23 +0200 Subject: uapi drm/armada_drm.h: use __u32 and __u64 instead of uint32_t and uint64_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are defined in linux/types.h or drm/drm.h. Fixes user space compilation errors like: drm/armada_drm.h:26:2: error: unknown type name ‘uint32_t’ uint32_t handle; ^~~~~~~~ Signed-off-by: Mikko Rapeli Cc: Emil Velikov Cc: Gabriel Laskar Cc: Russell King Cc: Rob Clark Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170806164428.2273-33-mikko.rapeli@iki.fi --- include/uapi/drm/armada_drm.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/armada_drm.h b/include/uapi/drm/armada_drm.h index 72e326f9c7de..0cb932416cfe 100644 --- a/include/uapi/drm/armada_drm.h +++ b/include/uapi/drm/armada_drm.h @@ -23,27 +23,27 @@ extern "C" { DRM_##dir(DRM_COMMAND_BASE + DRM_ARMADA_##name, struct drm_armada_##str) struct drm_armada_gem_create { - uint32_t handle; - uint32_t size; + __u32 handle; + __u32 size; }; #define DRM_IOCTL_ARMADA_GEM_CREATE \ ARMADA_IOCTL(IOWR, GEM_CREATE, gem_create) struct drm_armada_gem_mmap { - uint32_t handle; - uint32_t pad; - uint64_t offset; - uint64_t size; - uint64_t addr; + __u32 handle; + __u32 pad; + __u64 offset; + __u64 size; + __u64 addr; }; #define DRM_IOCTL_ARMADA_GEM_MMAP \ ARMADA_IOCTL(IOWR, GEM_MMAP, gem_mmap) struct drm_armada_gem_pwrite { - uint64_t ptr; - uint32_t handle; - uint32_t offset; - uint32_t size; + __u64 ptr; + __u32 handle; + __u32 offset; + __u32 size; }; #define DRM_IOCTL_ARMADA_GEM_PWRITE \ ARMADA_IOCTL(IOW, GEM_PWRITE, gem_pwrite) -- cgit v1.2.3 From 3be8eddd9d58a925b461b582fa5aa422a9c145ee Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 25 Jul 2017 09:27:33 -0700 Subject: drm/vc4: Add exec flags to allow forcing a specific X/Y tile walk order. This is useful to allow GL to provide defined results for overlapping glBlitFramebuffer, which X11 in turn uses to accelerate uncomposited window movement without first blitting to a temporary. x11perf -copywinwin100 goes from 1850/sec to 4850/sec. v2: Default to the same behavior as before when the flags aren't passed. (suggested by Boris) Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20170725162733.28007-2-eric@anholt.net Reviewed-by: Boris Brezillon --- drivers/gpu/drm/vc4/vc4_drv.c | 1 + drivers/gpu/drm/vc4/vc4_gem.c | 5 ++++- drivers/gpu/drm/vc4/vc4_render_cl.c | 21 ++++++++++++++++----- include/uapi/drm/vc4_drm.h | 11 +++++++++++ 4 files changed, 32 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index e8f0e1790d5e..1c96edcb302b 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -99,6 +99,7 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, case DRM_VC4_PARAM_SUPPORTS_BRANCHES: case DRM_VC4_PARAM_SUPPORTS_ETC1: case DRM_VC4_PARAM_SUPPORTS_THREADED_FS: + case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER: args->value = true; break; default: diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 22a55cff7e64..d0c6bfb68c4e 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -1007,7 +1007,10 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct ww_acquire_ctx acquire_ctx; int ret = 0; - if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { + if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR | + VC4_SUBMIT_CL_FIXED_RCL_ORDER | + VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X | + VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) { DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags); return -EINVAL; } diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c index e0539731130b..273984f71ae2 100644 --- a/drivers/gpu/drm/vc4/vc4_render_cl.c +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c @@ -261,8 +261,17 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, uint8_t max_y_tile = args->max_y_tile; uint8_t xtiles = max_x_tile - min_x_tile + 1; uint8_t ytiles = max_y_tile - min_y_tile + 1; - uint8_t x, y; + uint8_t xi, yi; uint32_t size, loop_body_size; + bool positive_x = true; + bool positive_y = true; + + if (args->flags & VC4_SUBMIT_CL_FIXED_RCL_ORDER) { + if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X)) + positive_x = false; + if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) + positive_y = false; + } size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE; loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE; @@ -354,10 +363,12 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, rcl_u16(setup, args->height); rcl_u16(setup, args->color_write.bits); - for (y = min_y_tile; y <= max_y_tile; y++) { - for (x = min_x_tile; x <= max_x_tile; x++) { - bool first = (x == min_x_tile && y == min_y_tile); - bool last = (x == max_x_tile && y == max_y_tile); + for (yi = 0; yi < ytiles; yi++) { + int y = positive_y ? min_y_tile + yi : max_y_tile - yi; + for (xi = 0; xi < xtiles; xi++) { + int x = positive_x ? min_x_tile + xi : max_x_tile - xi; + bool first = (xi == 0 && yi == 0); + bool last = (xi == xtiles - 1 && yi == ytiles - 1); emit_tile(exec, setup, x, y, first, last); } diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index 551628e571f9..afae87004963 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -155,6 +155,16 @@ struct drm_vc4_submit_cl { __u32 pad:24; #define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0) +/* By default, the kernel gets to choose the order that the tiles are + * rendered in. If this is set, then the tiles will be rendered in a + * raster order, with the right-to-left vs left-to-right and + * top-to-bottom vs bottom-to-top dictated by + * VC4_SUBMIT_CL_RCL_ORDER_INCREASING_*. This allows overlapping + * blits to be implemented using the 3D engine. + */ +#define VC4_SUBMIT_CL_FIXED_RCL_ORDER (1 << 1) +#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X (1 << 2) +#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y (1 << 3) __u32 flags; /* Returned value of the seqno of this render job (for the @@ -294,6 +304,7 @@ struct drm_vc4_get_hang_state { #define DRM_VC4_PARAM_SUPPORTS_BRANCHES 3 #define DRM_VC4_PARAM_SUPPORTS_ETC1 4 #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS 5 +#define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER 6 struct drm_vc4_get_param { __u32 param; -- cgit v1.2.3 From bbfb6ce86c9889a5d434e2e603d41e0ce5b552e2 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 1 Aug 2017 09:58:12 -0700 Subject: drm/i915: Implement .get_format_info() hook for CCS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SKL+ display engine can scan out certain kinds of compressed surfaces produced by the render engine. This involved telling the display engine the location of the color control surfae (CCS) which describes which parts of the main surface are compressed and which are not. The location of CCS is provided by userspace as just another plane with its own offset. By providing our own format information for the CCS formats, we should be able to make framebuffer_check() do the right thing for the CCS surface as well. Note that we'll return the same format info for both Y and Yf tiled format as that's what happens with the non-CCS Y vs. Yf as well. If desired, we could potentially return a unique pointer for each pixel_format+tiling+ccs combination, in which case we immediately be able to tell if any of that stuff changed by just comparing the pointers. But that does sound a bit wasteful space wise. v2: Drop the 'dev' argument from the hook v3: Include the description of the CCS surface layout v4: Pretend CCS tiles are regular 128 byte wide Y tiles (Jason) v5: Re-drop 'dev', fix commit message, add missing drm_fourcc.h description of CCS layout. (daniels) Cc: Daniel Vetter Cc: Ben Widawsky Cc: Jason Ekstrand Acked-by: Jason Ekstrand Reviewed-by: Ben Widawsky (v3) Reviewed-by: Daniel Stone Signed-off-by: Ville Syrjä Signed-off-by: Ben Widawsky Signed-off-by: Daniel Stone --- drivers/gpu/drm/i915/intel_display.c | 36 ++++++++++++++++++++++++++++++++++++ include/uapi/drm/drm_fourcc.h | 20 ++++++++++++++++++++ 2 files changed, 56 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e4af56b5ff27..51035ec1fe62 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2428,6 +2428,41 @@ static unsigned int intel_fb_modifier_to_tiling(uint64_t fb_modifier) } } +static const struct drm_format_info ccs_formats[] = { + { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, }, + { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, }, + { .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, }, + { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, }, +}; + +static const struct drm_format_info * +lookup_format_info(const struct drm_format_info formats[], + int num_formats, u32 format) +{ + int i; + + for (i = 0; i < num_formats; i++) { + if (formats[i].format == format) + return &formats[i]; + } + + return NULL; +} + +static const struct drm_format_info * +intel_get_format_info(const struct drm_mode_fb_cmd2 *cmd) +{ + switch (cmd->modifier[0]) { + case I915_FORMAT_MOD_Y_TILED_CCS: + case I915_FORMAT_MOD_Yf_TILED_CCS: + return lookup_format_info(ccs_formats, + ARRAY_SIZE(ccs_formats), + cmd->pixel_format); + default: + return NULL; + } +} + static int intel_fill_fb_info(struct drm_i915_private *dev_priv, struct drm_framebuffer *fb) @@ -13673,6 +13708,7 @@ static void intel_atomic_state_free(struct drm_atomic_state *state) static const struct drm_mode_config_funcs intel_mode_funcs = { .fb_create = intel_user_framebuffer_create, + .get_format_info = intel_get_format_info, .output_poll_changed = intel_fbdev_output_poll_changed, .atomic_check = intel_atomic_check, .atomic_commit = intel_atomic_commit, diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 76c9101a7fc6..3ad838d3f93f 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -263,6 +263,26 @@ extern "C" { */ #define I915_FORMAT_MOD_Yf_TILED fourcc_mod_code(INTEL, 3) +/* + * Intel color control surface (CCS) for render compression + * + * The framebuffer format must be one of the 8:8:8:8 RGB formats. + * The main surface will be plane index 0 and must be Y/Yf-tiled, + * the CCS will be plane index 1. + * + * Each CCS tile matches a 1024x512 pixel area of the main surface. + * To match certain aspects of the 3D hardware the CCS is + * considered to be made up of normal 128Bx32 Y tiles, Thus + * the CCS pitch must be specified in multiples of 128 bytes. + * + * In reality the CCS tile appears to be a 64Bx64 Y tile, composed + * of QWORD (8 bytes) chunks instead of OWORD (16 bytes) chunks. + * But that fact is not relevant unless the memory is accessed + * directly. + */ +#define I915_FORMAT_MOD_Y_TILED_CCS fourcc_mod_code(INTEL, 4) +#define I915_FORMAT_MOD_Yf_TILED_CCS fourcc_mod_code(INTEL, 5) + /* * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks * -- cgit v1.2.3 From cf6e7bac6357f0ccca51fcb5eb325e724f6b4c95 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Aug 2017 15:57:33 +0100 Subject: drm/i915: Add support for drm syncobjs This commit adds support for waiting on or signaling DRM syncobjs as part of execbuf. It does so by hijacking the currently unused cliprects pointer to instead point to an array of i915_gem_exec_fence structs which containe a DRM syncobj and a flags parameter which specifies whether to wait on it or to signal it. This implementation theoretically allows for both flags to be set in which case it waits on the dma_fence that was in the syncobj and then immediately replaces it with the dma_fence from the current execbuf. v2: - Rebase on new syncobj API v3: - Pull everything out into helpers - Do all allocation in gem_execbuffer2 - Pack the flags in the bottom 2 bits of the drm_syncobj* v4: - Prevent a potential race on syncobj->fence Testcase: igt/gem_exec_fence/syncobj* Signed-off-by: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/1499289202-25441-1-git-send-email-jason.ekstrand@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20170815145733.4562-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 152 ++++++++++++++++++++++++++++- include/uapi/drm/i915_drm.h | 31 +++++- 3 files changed, 178 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 25de4a95526b..43100229613c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -388,6 +388,7 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_FENCE: case I915_PARAM_HAS_EXEC_CAPTURE: case I915_PARAM_HAS_EXEC_BATCH_FIRST: + case I915_PARAM_HAS_EXEC_FENCE_ARRAY: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from @@ -2739,7 +2740,7 @@ static struct drm_driver driver = { */ .driver_features = DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME | - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC, + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ, .release = i915_driver_release, .open = i915_driver_open, .lastclose = i915_driver_lastclose, diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 99520b2fac7c..8e8bc7aefd9c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -32,6 +32,7 @@ #include #include +#include #include #include "i915_drv.h" @@ -1896,8 +1897,10 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) return false; /* Kernel clipping was a DRI1 misfeature */ - if (exec->num_cliprects || exec->cliprects_ptr) - return false; + if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { + if (exec->num_cliprects || exec->cliprects_ptr) + return false; + } if (exec->DR4 == 0xffffffff) { DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); @@ -2128,11 +2131,131 @@ eb_select_engine(struct drm_i915_private *dev_priv, return engine; } +static void +__free_fence_array(struct drm_syncobj **fences, unsigned int n) +{ + while (n--) + drm_syncobj_put(ptr_mask_bits(fences[n], 2)); + kvfree(fences); +} + +static struct drm_syncobj ** +get_fence_array(struct drm_i915_gem_execbuffer2 *args, + struct drm_file *file) +{ + const unsigned int nfences = args->num_cliprects; + struct drm_i915_gem_exec_fence __user *user; + struct drm_syncobj **fences; + unsigned int n; + int err; + + if (!(args->flags & I915_EXEC_FENCE_ARRAY)) + return NULL; + + if (nfences > SIZE_MAX / sizeof(*fences)) + return ERR_PTR(-EINVAL); + + user = u64_to_user_ptr(args->cliprects_ptr); + if (!access_ok(VERIFY_READ, user, nfences * 2 * sizeof(u32))) + return ERR_PTR(-EFAULT); + + fences = kvmalloc_array(args->num_cliprects, sizeof(*fences), + __GFP_NOWARN | GFP_TEMPORARY); + if (!fences) + return ERR_PTR(-ENOMEM); + + for (n = 0; n < nfences; n++) { + struct drm_i915_gem_exec_fence fence; + struct drm_syncobj *syncobj; + + if (__copy_from_user(&fence, user++, sizeof(fence))) { + err = -EFAULT; + goto err; + } + + syncobj = drm_syncobj_find(file, fence.handle); + if (!syncobj) { + DRM_DEBUG("Invalid syncobj handle provided\n"); + err = -ENOENT; + goto err; + } + + fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); + } + + return fences; + +err: + __free_fence_array(fences, n); + return ERR_PTR(err); +} + +static void +put_fence_array(struct drm_i915_gem_execbuffer2 *args, + struct drm_syncobj **fences) +{ + if (fences) + __free_fence_array(fences, args->num_cliprects); +} + +static int +await_fence_array(struct i915_execbuffer *eb, + struct drm_syncobj **fences) +{ + const unsigned int nfences = eb->args->num_cliprects; + unsigned int n; + int err; + + for (n = 0; n < nfences; n++) { + struct drm_syncobj *syncobj; + struct dma_fence *fence; + unsigned int flags; + + syncobj = ptr_unpack_bits(fences[n], &flags, 2); + if (!(flags & I915_EXEC_FENCE_WAIT)) + continue; + + rcu_read_lock(); + fence = dma_fence_get_rcu_safe(&syncobj->fence); + rcu_read_unlock(); + if (!fence) + return -EINVAL; + + err = i915_gem_request_await_dma_fence(eb->request, fence); + dma_fence_put(fence); + if (err < 0) + return err; + } + + return 0; +} + +static void +signal_fence_array(struct i915_execbuffer *eb, + struct drm_syncobj **fences) +{ + const unsigned int nfences = eb->args->num_cliprects; + struct dma_fence * const fence = &eb->request->fence; + unsigned int n; + + for (n = 0; n < nfences; n++) { + struct drm_syncobj *syncobj; + unsigned int flags; + + syncobj = ptr_unpack_bits(fences[n], &flags, 2); + if (!(flags & I915_EXEC_FENCE_SIGNAL)) + continue; + + drm_syncobj_replace_fence(syncobj, fence); + } +} + static int i915_gem_do_execbuffer(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, - struct drm_i915_gem_exec_object2 *exec) + struct drm_i915_gem_exec_object2 *exec, + struct drm_syncobj **fences) { struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; @@ -2318,6 +2441,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_request; } + if (fences) { + err = await_fence_array(&eb, fences); + if (err) + goto err_request; + } + if (out_fence_fd != -1) { out_fence = sync_file_create(&eb.request->fence); if (!out_fence) { @@ -2341,6 +2470,9 @@ err_request: __i915_add_request(eb.request, err == 0); add_to_client(eb.request, file); + if (fences) + signal_fence_array(&eb, fences); + if (out_fence) { if (err == 0) { fd_install(out_fence_fd, out_fence->file); @@ -2442,7 +2574,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, exec2_list[i].flags = 0; } - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); if (exec2.flags & __EXEC_HAS_RELOC) { struct drm_i915_gem_exec_object __user *user_exec_list = u64_to_user_ptr(args->buffers_ptr); @@ -2474,6 +2606,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, const size_t sz = sizeof(struct drm_i915_gem_exec_object2); struct drm_i915_gem_execbuffer2 *args = data; struct drm_i915_gem_exec_object2 *exec2_list; + struct drm_syncobj **fences = NULL; int err; if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) { @@ -2500,7 +2633,15 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EFAULT; } - err = i915_gem_do_execbuffer(dev, file, args, exec2_list); + if (args->flags & I915_EXEC_FENCE_ARRAY) { + fences = get_fence_array(args, file); + if (IS_ERR(fences)) { + kvfree(exec2_list); + return PTR_ERR(fences); + } + } + + err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); /* * Now that we have begun execution of the batchbuffer, we ignore @@ -2530,6 +2671,7 @@ end_user: } args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; + put_fence_array(args, fences); kvfree(exec2_list); return err; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index ce3833fa1e06..6598fb76d2c2 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -435,6 +435,11 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY. + */ +#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 + typedef struct drm_i915_getparam { __s32 param; /* @@ -816,6 +821,17 @@ struct drm_i915_gem_exec_object2 { __u64 rsvd2; }; +struct drm_i915_gem_exec_fence { + /** + * User's handle for a drm_syncobj to wait on or signal. + */ + __u32 handle; + +#define I915_EXEC_FENCE_WAIT (1<<0) +#define I915_EXEC_FENCE_SIGNAL (1<<1) + __u32 flags; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs @@ -830,7 +846,11 @@ struct drm_i915_gem_execbuffer2 { __u32 DR1; __u32 DR4; __u32 num_cliprects; - /** This is a struct drm_clip_rect *cliprects */ + /** + * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY + * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a + * struct drm_i915_gem_exec_fence *fences. + */ __u64 cliprects_ptr; #define I915_EXEC_RING_MASK (7<<0) #define I915_EXEC_DEFAULT (0<<0) @@ -931,7 +951,14 @@ struct drm_i915_gem_execbuffer2 { * element). */ #define I915_EXEC_BATCH_FIRST (1<<18) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1)) + +/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr + * define an array of i915_gem_exec_fence structures which specify a set of + * dma fences to wait upon or signal. + */ +#define I915_EXEC_FENCE_ARRAY (1<<19) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- cgit v1.2.3 From 6a1c9510694fe1e901a3b5b53386eac069adcea6 Mon Sep 17 00:00:00 2001 From: Moses Reuben Date: Tue, 15 Aug 2017 23:00:20 -0400 Subject: drm/amdkfd: Adding new IOCTL for scratch memory v2 v2: * Renamed ALLOC_MEMORY_OF_SCRATCH to SET_SCRATCH_BACKING_VA * Removed size parameter from the ioctl, it was unused * Removed hole in ioctl number space * No more call to write_config_static_mem * Return correct error code from ioctl Signed-off-by: Moses Reuben Signed-off-by: Ben Goz Signed-off-by: Felix Kuehling Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 37 ++++++++++++++++++++++ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 ++ .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 2 ++ .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 2 ++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + include/uapi/linux/kfd_ioctl.h | 11 ++++++- 6 files changed, 55 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 65b506f19b46..7436d34b77ab 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -848,6 +848,40 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, return err; } +static int kfd_ioctl_set_scratch_backing_va(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_set_scratch_backing_va_args *args = data; + struct kfd_process_device *pdd; + struct kfd_dev *dev; + long err; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = PTR_ERR(pdd); + goto bind_process_to_device_fail; + } + + pdd->qpd.sh_hidden_private_base = args->va_addr; + + mutex_unlock(&p->mutex); + + if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0) + dev->kfd2kgd->set_scratch_backing_va( + dev->kgd, args->va_addr, pdd->qpd.vmid); + + return 0; + +bind_process_to_device_fail: + mutex_unlock(&p->mutex); + return err; +} #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ @@ -902,6 +936,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, kfd_ioctl_dbg_wave_control, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, + kfd_ioctl_set_scratch_backing_va, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 618ac65b6136..53a66e821624 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -270,6 +270,9 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", q->pipe, q->queue); + dqm->dev->kfd2kgd->set_scratch_backing_va( + dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, q->process->mm); if (retval) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index fadc56a8be71..72c3cbabc0a7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -24,6 +24,7 @@ #include "kfd_device_queue_manager.h" #include "cik_regs.h" #include "oss/oss_2_4_sh_mask.h" +#include "gca/gfx_7_2_sh_mask.h" static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, struct qcm_process_device *qpd, @@ -123,6 +124,7 @@ static int register_process_cik(struct device_queue_manager *dqm, } else { temp = get_sh_mem_bases_nybble_64(pdd); qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRIVATE_ATC__SHIFT; } pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 15e81ae9d2f4..40e9ddd096cd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -135,6 +135,8 @@ static int register_process_vi(struct device_queue_manager *dqm, qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT; + qpd->sh_mem_config |= 1 << + SH_MEM_CONFIG__PRIVATE_ATC__SHIFT; } pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 30ce92c6e6a1..b397ec726400 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -432,6 +432,7 @@ struct qcm_process_device { uint32_t gds_size; uint32_t num_gws; uint32_t num_oac; + uint32_t sh_hidden_private_base; }; /* Data that is per-process-per device. */ diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index d6833426fdef..1b9c5609d523 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -232,6 +232,12 @@ struct kfd_ioctl_wait_events_args { uint32_t wait_result; /* from KFD */ }; +struct kfd_ioctl_set_scratch_backing_va_args { + uint64_t va_addr; /* to KFD */ + uint32_t gpu_id; /* to KFD */ + uint32_t pad; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -286,7 +292,10 @@ struct kfd_ioctl_wait_events_args { #define AMDKFD_IOC_DBG_WAVE_CONTROL \ AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) +#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ + AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x11 +#define AMDKFD_COMMAND_END 0x12 #endif -- cgit v1.2.3 From 5d71dbc3a588690c3d66d76db8cd29973425ce6d Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Tue, 15 Aug 2017 23:00:22 -0400 Subject: drm/amdkfd: Implement image tiling mode support v2 v2: Removed hole in ioctl number space Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 43 ++++++++++++++++++++++++++++++++ include/uapi/linux/kfd_ioctl.h | 28 ++++++++++++++++++++- 2 files changed, 70 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 7436d34b77ab..e4a8c2e52cb2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -883,6 +883,46 @@ bind_process_to_device_fail: return err; } +static int kfd_ioctl_get_tile_config(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_tile_config_args *args = data; + struct kfd_dev *dev; + struct tile_config config; + int err = 0; + + dev = kfd_device_by_id(args->gpu_id); + + dev->kfd2kgd->get_tile_config(dev->kgd, &config); + + args->gb_addr_config = config.gb_addr_config; + args->num_banks = config.num_banks; + args->num_ranks = config.num_ranks; + + if (args->num_tile_configs > config.num_tile_configs) + args->num_tile_configs = config.num_tile_configs; + err = copy_to_user((void __user *)args->tile_config_ptr, + config.tile_config_ptr, + args->num_tile_configs * sizeof(uint32_t)); + if (err) { + args->num_tile_configs = 0; + return -EFAULT; + } + + if (args->num_macro_tile_configs > config.num_macro_tile_configs) + args->num_macro_tile_configs = + config.num_macro_tile_configs; + err = copy_to_user((void __user *)args->macro_tile_config_ptr, + config.macro_tile_config_ptr, + args->num_macro_tile_configs * sizeof(uint32_t)); + if (err) { + args->num_macro_tile_configs = 0; + return -EFAULT; + } + + return 0; +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -939,6 +979,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, kfd_ioctl_set_scratch_backing_va, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, + kfd_ioctl_get_tile_config, 0) }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 1b9c5609d523..7b4567bacfc2 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -238,6 +238,29 @@ struct kfd_ioctl_set_scratch_backing_va_args { uint32_t pad; }; +struct kfd_ioctl_get_tile_config_args { + /* to KFD: pointer to tile array */ + uint64_t tile_config_ptr; + /* to KFD: pointer to macro tile array */ + uint64_t macro_tile_config_ptr; + /* to KFD: array size allocated by user mode + * from KFD: array size filled by kernel + */ + uint32_t num_tile_configs; + /* to KFD: array size allocated by user mode + * from KFD: array size filled by kernel + */ + uint32_t num_macro_tile_configs; + + uint32_t gpu_id; /* to KFD */ + uint32_t gb_addr_config; /* from KFD */ + uint32_t num_banks; /* from KFD */ + uint32_t num_ranks; /* from KFD */ + /* struct size can be extended later if needed + * without breaking ABI compatibility + */ +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -295,7 +318,10 @@ struct kfd_ioctl_set_scratch_backing_va_args { #define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args) +#define AMDKFD_IOC_GET_TILE_CONFIG \ + AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x12 +#define AMDKFD_COMMAND_END 0x13 #endif -- cgit v1.2.3 From f44d85389e17b2e960620c1c6d89bda978a11f2b Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 24 Aug 2017 16:08:14 +0100 Subject: drm: rename u32 in __u32 in uapi All other fields use __ Cc: Ben Widawsky Fixes: db1689aa61b ("drm: Create a format/modifier blob") Signed-off-by: Lionel Landwerlin Signed-off-by: Daniel Stone Reviewed-by: Chris Wilson Reviewed-by: Emil Velikov Reviewed-by: Ben Widawsky Reviewed-by: Daniel Stone Link: https://patchwork.freedesktop.org/patch/msgid/20170824150814.5878-1-lionel.g.landwerlin@intel.com --- include/uapi/drm/drm_mode.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index a2bb7161f020..54fc38c3c3f1 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -715,24 +715,24 @@ struct drm_mode_atomic { struct drm_format_modifier_blob { #define FORMAT_BLOB_CURRENT 1 /* Version of this blob format */ - u32 version; + __u32 version; /* Flags */ - u32 flags; + __u32 flags; /* Number of fourcc formats supported */ - u32 count_formats; + __u32 count_formats; /* Where in this blob the formats exist (in bytes) */ - u32 formats_offset; + __u32 formats_offset; /* Number of drm_format_modifiers */ - u32 count_modifiers; + __u32 count_modifiers; /* Where in this blob the modifiers exist (in bytes) */ - u32 modifiers_offset; + __u32 modifiers_offset; - /* u32 formats[] */ + /* __u32 formats[] */ /* struct drm_format_modifier modifiers[] */ }; -- cgit v1.2.3 From 2cfa0bb25d25aa183ea29f1f9c2bc65f3f2c2264 Mon Sep 17 00:00:00 2001 From: Sinclair Yeh Date: Wed, 5 Jul 2017 01:37:55 -0700 Subject: drm/vmwgfx: Prepare to support fence fd Make the fields and flags available. Signed-off-by: Sinclair Yeh Reviewed-by: Deepak Singh Rawat Reviewed-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 5 ----- include/uapi/drm/vmwgfx_drm.h | 11 ++++++++--- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 8c583fd16c79..178dabcdb198 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -4449,11 +4449,6 @@ int vmw_execbuf_ioctl(struct drm_device *dev, unsigned long data, arg.context_handle = (uint32_t) -1; break; case 2: - if (arg.pad64 != 0) { - DRM_ERROR("Unused IOCTL data not set to zero.\n"); - return -EINVAL; - } - break; default: break; } diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h index d9dfde9aa757..0bc784f5e0db 100644 --- a/include/uapi/drm/vmwgfx_drm.h +++ b/include/uapi/drm/vmwgfx_drm.h @@ -297,13 +297,17 @@ union drm_vmw_surface_reference_arg { * @version: Allows expanding the execbuf ioctl parameters without breaking * backwards compatibility, since user-space will always tell the kernel * which version it uses. - * @flags: Execbuf flags. None currently. + * @flags: Execbuf flags. + * @imported_fence_fd: FD for a fence imported from another device * * Argument to the DRM_VMW_EXECBUF Ioctl. */ #define DRM_VMW_EXECBUF_VERSION 2 +#define DRM_VMW_EXECBUF_FLAG_IMPORT_FENCE_FD (1 << 0) +#define DRM_VMW_EXECBUF_FLAG_EXPORT_FENCE_FD (1 << 1) + struct drm_vmw_execbuf_arg { __u64 commands; __u32 command_size; @@ -312,7 +316,7 @@ struct drm_vmw_execbuf_arg { __u32 version; __u32 flags; __u32 context_handle; - __u32 pad64; + __s32 imported_fence_fd; }; /** @@ -328,6 +332,7 @@ struct drm_vmw_execbuf_arg { * @passed_seqno: The highest seqno number processed by the hardware * so far. This can be used to mark user-space fence objects as signaled, and * to determine whether a fence seqno might be stale. + * @fd: FD associated with the fence, -1 if not exported * @error: This member should've been set to -EFAULT on submission. * The following actions should be take on completion: * error == -EFAULT: Fence communication failed. The host is synchronized. @@ -345,7 +350,7 @@ struct drm_vmw_fence_rep { __u32 mask; __u32 seqno; __u32 passed_seqno; - __u32 pad64; + __s32 fd; __s32 error; }; -- cgit v1.2.3 From 5e60a10eaebab93f823295cd7ec3848ba3b6e553 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 25 Aug 2017 10:52:22 -0700 Subject: drm/syncobj: add sync obj wait interface. (v8) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This interface will allow sync object to be used to back Vulkan fences. This API is pretty much the vulkan fence waiting API, and I've ported the code from amdgpu. v2: accept relative timeout, pass remaining time back to userspace. v3: return to absolute timeouts. v4: absolute zero = poll, rewrite any/all code to have same operation for arrays return -EINVAL for 0 fences. v4.1: fixup fences allocation check, use u64_to_user_ptr v5: move to sec/nsec, and use timespec64 for calcs. v6: use -ETIME and drop the out status flag. (-ETIME is suggested by ickle, I can feel a shed painting) v7: talked to Daniel/Arnd, use ktime and ns everywhere. v8: be more careful in the timeout calculations use uint32_t for counter variables so we don't overflow graciously handle -ENOINT being returned from dma_fence_wait_timeout Signed-off-by: Dave Airlie Reviewed-by: Jason Ekstrand Acked-by: Christian König Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_internal.h | 2 + drivers/gpu/drm/drm_ioctl.c | 2 + drivers/gpu/drm/drm_syncobj.c | 142 +++++++++++++++++++++++++++++++++++++++++ include/uapi/drm/drm.h | 12 ++++ 4 files changed, 158 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 4e906b82a170..534e5ac43bf8 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -167,3 +167,5 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); +int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index d920b2118a39..b4f443417a28 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -657,6 +657,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl, + DRM_UNLOCKED|DRM_RENDER_ALLOW), }; #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index eea38d82645c..4e8563c36d6e 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1,5 +1,7 @@ /* * Copyright 2017 Red Hat + * Parts ported from amdgpu (fence wait code). + * Copyright 2016 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -31,6 +33,9 @@ * that contain an optional fence. The fence can be updated with a new * fence, or be NULL. * + * syncobj's can be waited upon, where it will wait for the underlying + * fence. + * * syncobj's can be export to fd's and back, these fd's are opaque and * have no other use case, except passing the syncobj between processes. * @@ -447,3 +452,140 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, return drm_syncobj_fd_to_handle(file_private, args->fd, &args->handle); } + +/** + * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute value + * + * @timeout_nsec: timeout nsec component in ns, 0 for poll + * + * Calculate the timeout in jiffies from an absolute time in sec/nsec. + */ +static signed long drm_timeout_abs_to_jiffies(int64_t timeout_nsec) +{ + ktime_t abs_timeout, now; + u64 timeout_ns, timeout_jiffies64; + + /* make 0 timeout means poll - absolute 0 doesn't seem valid */ + if (timeout_nsec == 0) + return 0; + + abs_timeout = ns_to_ktime(timeout_nsec); + now = ktime_get(); + + if (!ktime_after(abs_timeout, now)) + return 0; + + timeout_ns = ktime_to_ns(ktime_sub(abs_timeout, now)); + + timeout_jiffies64 = nsecs_to_jiffies64(timeout_ns); + /* clamp timeout to avoid infinite timeout */ + if (timeout_jiffies64 >= MAX_SCHEDULE_TIMEOUT - 1) + return MAX_SCHEDULE_TIMEOUT - 1; + + return timeout_jiffies64 + 1; +} + +static int drm_syncobj_wait_fences(struct drm_device *dev, + struct drm_file *file_private, + struct drm_syncobj_wait *wait, + struct dma_fence **fences) +{ + signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec); + signed long ret = 0; + uint32_t first = ~0; + + if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) { + uint32_t i; + for (i = 0; i < wait->count_handles; i++) { + ret = dma_fence_wait_timeout(fences[i], true, timeout); + + /* Various dma_fence wait callbacks will return + * ENOENT to indicate that the fence has already + * been signaled. We need to sanitize this to 0 so + * we don't return early and the client doesn't see + * an unexpected error. + */ + if (ret == -ENOENT) + ret = 0; + + if (ret < 0) + return ret; + if (ret == 0) + break; + timeout = ret; + } + first = 0; + } else { + ret = dma_fence_wait_any_timeout(fences, + wait->count_handles, + true, timeout, + &first); + } + + if (ret < 0) + return ret; + + wait->first_signaled = first; + if (ret == 0) + return -ETIME; + return 0; +} + +int +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private) +{ + struct drm_syncobj_wait *args = data; + uint32_t *handles; + struct dma_fence **fences; + int ret = 0; + uint32_t i; + + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) + return -ENODEV; + + if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) + return -EINVAL; + + if (args->count_handles == 0) + return -EINVAL; + + /* Get the handles from userspace */ + handles = kmalloc_array(args->count_handles, sizeof(uint32_t), + GFP_KERNEL); + if (handles == NULL) + return -ENOMEM; + + if (copy_from_user(handles, + u64_to_user_ptr(args->handles), + sizeof(uint32_t) * args->count_handles)) { + ret = -EFAULT; + goto err_free_handles; + } + + fences = kcalloc(args->count_handles, + sizeof(struct dma_fence *), GFP_KERNEL); + if (!fences) { + ret = -ENOMEM; + goto err_free_handles; + } + + for (i = 0; i < args->count_handles; i++) { + ret = drm_syncobj_find_fence(file_private, handles[i], + &fences[i]); + if (ret) + goto err_free_fence_array; + } + + ret = drm_syncobj_wait_fences(dev, file_private, + args, fences); + +err_free_fence_array: + for (i = 0; i < args->count_handles; i++) + dma_fence_put(fences[i]); + kfree(fences); +err_free_handles: + kfree(handles); + + return ret; +} diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 101593ab10ac..0757c1a41821 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -718,6 +718,17 @@ struct drm_syncobj_handle { __u32 pad; }; +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +struct drm_syncobj_wait { + __u64 handles; + /* absolute timeout */ + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; +}; + #if defined(__cplusplus) } #endif @@ -840,6 +851,7 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) /** * Device specific ioctls should only be in their respective headers -- cgit v1.2.3 From 1fc08218ed2a42c86af5c905fe4c00885376a07e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Aug 2017 10:52:25 -0700 Subject: drm/syncobj: Add a CREATE_SIGNALED flag This requests that the driver create the sync object such that it already has a signaled dma_fence attached. Because we don't need anything in particular (just something signaled), we use a dummy null fence. This is useful for Vulkan which has a similar flag that can be passed to vkCreateFence. Signed-off-by: Jason Ekstrand Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_syncobj.c | 57 ++++++++++++++++++++++++++++++++++++++++--- include/uapi/drm/drm.h | 1 + 2 files changed, 55 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index bade497b3f1d..12db8c9564cd 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -154,6 +154,49 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, } EXPORT_SYMBOL(drm_syncobj_replace_fence); +struct drm_syncobj_null_fence { + struct dma_fence base; + spinlock_t lock; +}; + +static const char *drm_syncobj_null_fence_get_name(struct dma_fence *fence) +{ + return "syncobjnull"; +} + +static bool drm_syncobj_null_fence_enable_signaling(struct dma_fence *fence) +{ + dma_fence_enable_sw_signaling(fence); + return !dma_fence_is_signaled(fence); +} + +static const struct dma_fence_ops drm_syncobj_null_fence_ops = { + .get_driver_name = drm_syncobj_null_fence_get_name, + .get_timeline_name = drm_syncobj_null_fence_get_name, + .enable_signaling = drm_syncobj_null_fence_enable_signaling, + .wait = dma_fence_default_wait, + .release = NULL, +}; + +static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) +{ + struct drm_syncobj_null_fence *fence; + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (fence == NULL) + return -ENOMEM; + + spin_lock_init(&fence->lock); + dma_fence_init(&fence->base, &drm_syncobj_null_fence_ops, + &fence->lock, 0, 0); + dma_fence_signal(&fence->base); + + drm_syncobj_replace_fence(syncobj, &fence->base); + + dma_fence_put(&fence->base); + + return 0; +} + int drm_syncobj_find_fence(struct drm_file *file_private, u32 handle, struct dma_fence **fence) @@ -190,7 +233,7 @@ void drm_syncobj_free(struct kref *kref) EXPORT_SYMBOL(drm_syncobj_free); static int drm_syncobj_create(struct drm_file *file_private, - u32 *handle) + u32 *handle, uint32_t flags) { int ret; struct drm_syncobj *syncobj; @@ -203,6 +246,14 @@ static int drm_syncobj_create(struct drm_file *file_private, INIT_LIST_HEAD(&syncobj->cb_list); spin_lock_init(&syncobj->lock); + if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) { + ret = drm_syncobj_assign_null_handle(syncobj); + if (ret < 0) { + drm_syncobj_put(syncobj); + return ret; + } + } + idr_preload(GFP_KERNEL); spin_lock(&file_private->syncobj_table_lock); ret = idr_alloc(&file_private->syncobj_idr, syncobj, 1, 0, GFP_NOWAIT); @@ -438,11 +489,11 @@ drm_syncobj_create_ioctl(struct drm_device *dev, void *data, return -ENODEV; /* no valid flags yet */ - if (args->flags) + if (args->flags & ~DRM_SYNCOBJ_CREATE_SIGNALED) return -EINVAL; return drm_syncobj_create(file_private, - &args->handle); + &args->handle, args->flags); } int diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 0757c1a41821..ade7f68d32b5 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -700,6 +700,7 @@ struct drm_prime_handle { struct drm_syncobj_create { __u32 handle; +#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0) __u32 flags; }; -- cgit v1.2.3 From e7aca5031a2fb51b6120864d0eff5478c95e6651 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Aug 2017 10:52:24 -0700 Subject: drm/syncobj: Allow wait for submit and signal behavior (v5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand Cc: Dave Airlie Cc: Chris Wilson Cc: Christian König Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_syncobj.c | 252 ++++++++++++++++++++++++++++++++++-------- include/uapi/drm/drm.h | 1 + 2 files changed, 208 insertions(+), 45 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 12db8c9564cd..cccd3bd194c6 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "drm_internal.h" #include @@ -88,6 +89,35 @@ static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj, list_add_tail(&cb->node, &syncobj->cb_list); } +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, + struct dma_fence **fence, + struct drm_syncobj_cb *cb, + drm_syncobj_func_t func) +{ + int ret; + + *fence = drm_syncobj_fence_get(syncobj); + if (*fence) + return 1; + + spin_lock(&syncobj->lock); + /* We've already tried once to get a fence and failed. Now that we + * have the lock, try one more time just to be sure we don't add a + * callback when a fence has already been set. + */ + if (syncobj->fence) { + *fence = dma_fence_get(syncobj->fence); + ret = 1; + } else { + *fence = NULL; + drm_syncobj_add_callback_locked(syncobj, cb, func); + ret = 0; + } + spin_unlock(&syncobj->lock); + + return ret; +} + /** * drm_syncobj_add_callback - adds a callback to syncobj::cb_list * @syncobj: Sync object to which to add the callback @@ -560,6 +590,160 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, &args->handle); } +struct syncobj_wait_entry { + struct task_struct *task; + struct dma_fence *fence; + struct dma_fence_cb fence_cb; + struct drm_syncobj_cb syncobj_cb; +}; + +static void syncobj_wait_fence_func(struct dma_fence *fence, + struct dma_fence_cb *cb) +{ + struct syncobj_wait_entry *wait = + container_of(cb, struct syncobj_wait_entry, fence_cb); + + wake_up_process(wait->task); +} + +static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, + struct drm_syncobj_cb *cb) +{ + struct syncobj_wait_entry *wait = + container_of(cb, struct syncobj_wait_entry, syncobj_cb); + + /* This happens inside the syncobj lock */ + wait->fence = dma_fence_get(syncobj->fence); + wake_up_process(wait->task); +} + +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, + uint32_t count, + uint32_t flags, + signed long timeout, + uint32_t *idx) +{ + struct syncobj_wait_entry *entries; + struct dma_fence *fence; + signed long ret; + uint32_t signaled_count, i; + + entries = kcalloc(count, sizeof(*entries), GFP_KERNEL); + if (!entries) + return -ENOMEM; + + /* Walk the list of sync objects and initialize entries. We do + * this up-front so that we can properly return -EINVAL if there is + * a syncobj with a missing fence and then never have the chance of + * returning -EINVAL again. + */ + signaled_count = 0; + for (i = 0; i < count; ++i) { + entries[i].task = current; + entries[i].fence = drm_syncobj_fence_get(syncobjs[i]); + if (!entries[i].fence) { + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + continue; + } else { + ret = -EINVAL; + goto cleanup_entries; + } + } + + if (dma_fence_is_signaled(entries[i].fence)) { + if (signaled_count == 0 && idx) + *idx = i; + signaled_count++; + } + } + + /* Initialize ret to the max of timeout and 1. That way, the + * default return value indicates a successful wait and not a + * timeout. + */ + ret = max_t(signed long, timeout, 1); + + if (signaled_count == count || + (signaled_count > 0 && + !(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL))) + goto cleanup_entries; + + /* There's a very annoying laxness in the dma_fence API here, in + * that backends are not required to automatically report when a + * fence is signaled prior to fence->ops->enable_signaling() being + * called. So here if we fail to match signaled_count, we need to + * fallthough and try a 0 timeout wait! + */ + + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + for (i = 0; i < count; ++i) { + drm_syncobj_fence_get_or_add_callback(syncobjs[i], + &entries[i].fence, + &entries[i].syncobj_cb, + syncobj_wait_syncobj_func); + } + } + + do { + set_current_state(TASK_INTERRUPTIBLE); + + signaled_count = 0; + for (i = 0; i < count; ++i) { + fence = entries[i].fence; + if (!fence) + continue; + + if (dma_fence_is_signaled(fence) || + (!entries[i].fence_cb.func && + dma_fence_add_callback(fence, + &entries[i].fence_cb, + syncobj_wait_fence_func))) { + /* The fence has been signaled */ + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) { + signaled_count++; + } else { + if (idx) + *idx = i; + goto done_waiting; + } + } + } + + if (signaled_count == count) + goto done_waiting; + + if (timeout == 0) { + /* If we are doing a 0 timeout wait and we got + * here, then we just timed out. + */ + ret = 0; + goto done_waiting; + } + + ret = schedule_timeout(ret); + + if (ret > 0 && signal_pending(current)) + ret = -ERESTARTSYS; + } while (ret > 0); + +done_waiting: + __set_current_state(TASK_RUNNING); + +cleanup_entries: + for (i = 0; i < count; ++i) { + if (entries[i].syncobj_cb.func) + drm_syncobj_remove_callback(syncobjs[i], + &entries[i].syncobj_cb); + if (entries[i].fence_cb.func) + dma_fence_remove_callback(entries[i].fence, + &entries[i].fence_cb); + dma_fence_put(entries[i].fence); + } + kfree(entries); + + return ret; +} + /** * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute value * @@ -592,43 +776,19 @@ static signed long drm_timeout_abs_to_jiffies(int64_t timeout_nsec) return timeout_jiffies64 + 1; } -static int drm_syncobj_wait_fences(struct drm_device *dev, - struct drm_file *file_private, - struct drm_syncobj_wait *wait, - struct dma_fence **fences) +static int drm_syncobj_array_wait(struct drm_device *dev, + struct drm_file *file_private, + struct drm_syncobj_wait *wait, + struct drm_syncobj **syncobjs) { signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec); signed long ret = 0; uint32_t first = ~0; - if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) { - uint32_t i; - for (i = 0; i < wait->count_handles; i++) { - ret = dma_fence_wait_timeout(fences[i], true, timeout); - - /* Various dma_fence wait callbacks will return - * ENOENT to indicate that the fence has already - * been signaled. We need to sanitize this to 0 so - * we don't return early and the client doesn't see - * an unexpected error. - */ - if (ret == -ENOENT) - ret = 0; - - if (ret < 0) - return ret; - if (ret == 0) - break; - timeout = ret; - } - first = 0; - } else { - ret = dma_fence_wait_any_timeout(fences, - wait->count_handles, - true, timeout, - &first); - } - + ret = drm_syncobj_array_wait_timeout(syncobjs, + wait->count_handles, + wait->flags, + timeout, &first); if (ret < 0) return ret; @@ -644,14 +804,15 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, { struct drm_syncobj_wait *args = data; uint32_t *handles; - struct dma_fence **fences; + struct drm_syncobj **syncobjs; int ret = 0; uint32_t i; if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) return -ENODEV; - if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) + if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)) return -EINVAL; if (args->count_handles == 0) @@ -670,27 +831,28 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, goto err_free_handles; } - fences = kcalloc(args->count_handles, - sizeof(struct dma_fence *), GFP_KERNEL); - if (!fences) { + syncobjs = kcalloc(args->count_handles, + sizeof(struct drm_syncobj *), GFP_KERNEL); + if (!syncobjs) { ret = -ENOMEM; goto err_free_handles; } for (i = 0; i < args->count_handles; i++) { - ret = drm_syncobj_find_fence(file_private, handles[i], - &fences[i]); - if (ret) + syncobjs[i] = drm_syncobj_find(file_private, handles[i]); + if (!syncobjs[i]) { + ret = -ENOENT; goto err_free_fence_array; + } } - ret = drm_syncobj_wait_fences(dev, file_private, - args, fences); + ret = drm_syncobj_array_wait(dev, file_private, + args, syncobjs); err_free_fence_array: - for (i = 0; i < args->count_handles; i++) - dma_fence_put(fences[i]); - kfree(fences); + while (i-- > 0) + drm_syncobj_put(syncobjs[i]); + kfree(syncobjs); err_free_handles: kfree(handles); diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index ade7f68d32b5..4c746597225e 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -720,6 +720,7 @@ struct drm_syncobj_handle { }; #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) struct drm_syncobj_wait { __u64 handles; /* absolute timeout */ -- cgit v1.2.3 From aa4035d2c7683d2f2fb0ffe8087abd9eabf6d54a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Aug 2017 14:10:27 -0700 Subject: drm/syncobj: Add a reset ioctl (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This just resets the dma_fence to NULL so it looks like it's never been signaled. This will be useful once we add the new wait API for allowing wait on "submit and signal" behavior. v2: - Take an array of sync objects (Dave Airlie) v3: - Throw -EINVAL if pad != 0 Signed-off-by: Jason Ekstrand Reviewed-by: Christian König (v1) Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_internal.h | 2 ++ drivers/gpu/drm/drm_ioctl.c | 2 ++ drivers/gpu/drm/drm_syncobj.c | 33 +++++++++++++++++++++++++++++++++ include/uapi/drm/drm.h | 7 +++++++ 4 files changed, 44 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 534e5ac43bf8..83f1615eb1ec 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -169,3 +169,5 @@ int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); +int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index b4f443417a28..16c5d51a43aa 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -659,6 +659,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl, + DRM_UNLOCKED|DRM_RENDER_ALLOW), }; #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 15e74ca61760..40d2ad293661 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -885,3 +885,36 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, return ret; } + +int +drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private) +{ + struct drm_syncobj_array *args = data; + struct drm_syncobj **syncobjs; + uint32_t i; + int ret; + + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) + return -ENODEV; + + if (args->pad != 0) + return -EINVAL; + + if (args->count_handles == 0) + return -EINVAL; + + ret = drm_syncobj_array_find(file_private, + u64_to_user_ptr(args->handles), + args->count_handles, + &syncobjs); + if (ret < 0) + return ret; + + for (i = 0; i < args->count_handles; i++) + drm_syncobj_replace_fence(syncobjs[i], NULL); + + drm_syncobj_array_free(syncobjs, args->count_handles); + + return 0; +} diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 4c746597225e..b037fdf9e43b 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -731,6 +731,12 @@ struct drm_syncobj_wait { __u32 pad; }; +struct drm_syncobj_array { + __u64 handles; + __u32 count_handles; + __u32 pad; +}; + #if defined(__cplusplus) } #endif @@ -854,6 +860,7 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) +#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) /** * Device specific ioctls should only be in their respective headers -- cgit v1.2.3 From ffa9443fb3d3eddf0fdf6ac473dc8b5c87f08f15 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Aug 2017 14:10:28 -0700 Subject: drm/syncobj: Add a signal ioctl (v3) This IOCTL provides a mechanism for userspace to trigger a sync object directly. There are other ways that userspace can trigger a syncobj such as submitting a dummy batch somewhere or hanging on to a triggered sync_file and doing an import. This just provides an easy way to manually trigger the sync object without weird hacks. The motivation for this IOCTL is Vulkan fences. Vulkan lets you create a fence already in the signaled state so that you can wait on it immediatly without stalling. We could also handle this with a new create flag to ask the driver to create a syncobj that is already signaled but the IOCTL seemed a bit cleaner and more generic. v2: - Take an array of sync objects (Dave Airlie) v3: - Throw -EINVAL if pad != 0 Signed-off-by: Jason Ekstrand Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_internal.h | 2 ++ drivers/gpu/drm/drm_ioctl.c | 2 ++ drivers/gpu/drm/drm_syncobj.c | 36 ++++++++++++++++++++++++++++++++++++ include/uapi/drm/drm.h | 1 + 4 files changed, 41 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 83f1615eb1ec..fbc3f308fa19 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -171,3 +171,5 @@ int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); +int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 16c5d51a43aa..a9ae6dd2d593 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -661,6 +661,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, + DRM_UNLOCKED|DRM_RENDER_ALLOW), }; #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 40d2ad293661..0422b8c2c2e7 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -918,3 +918,39 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, return 0; } + +int +drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private) +{ + struct drm_syncobj_array *args = data; + struct drm_syncobj **syncobjs; + uint32_t i; + int ret; + + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) + return -ENODEV; + + if (args->pad != 0) + return -EINVAL; + + if (args->count_handles == 0) + return -EINVAL; + + ret = drm_syncobj_array_find(file_private, + u64_to_user_ptr(args->handles), + args->count_handles, + &syncobjs); + if (ret < 0) + return ret; + + for (i = 0; i < args->count_handles; i++) { + ret = drm_syncobj_assign_null_handle(syncobjs[i]); + if (ret < 0) + break; + } + + drm_syncobj_array_free(syncobjs, args->count_handles); + + return ret; +} diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index b037fdf9e43b..97677cd6964d 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -861,6 +861,7 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) #define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) +#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array) /** * Device specific ioctls should only be in their respective headers -- cgit v1.2.3