diff options
author | Dave Airlie <airlied@redhat.com> | 2021-10-22 06:30:33 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2021-10-22 06:30:34 +1000 |
commit | 6f2f7c83303d2227f47551423e507d77d9ea01c7 (patch) | |
tree | 05dff35a0b96494a2419ff5747f80c4648c10cc4 /include/uapi/drm | |
parent | 94ff371eb8497d02b8cb9d0c2c7370193c98e9f7 (diff) | |
parent | ab5d964c001b9efffcbfa4d67a30186b67d79771 (diff) |
Merge tag 'drm-intel-gt-next-2021-10-21' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
UAPI Changes:
- Expose multi-LRC submission interface
Similar to the bonded submission interface but simplified.
Comes with GuC only implementation for now. See kerneldoc
for more details.
Userspace changes: https://github.com/intel/media-driver/pull/1252
- Expose logical engine instance to user
Needed by the multi-LRC submission interface for GuC
Userspace changes: https://github.com/intel/media-driver/pull/1252
Driver Changes:
- Fix blank screen booting crashes when CONFIG_CC_OPTIMIZE_FOR_SIZE=y (Hugh)
- Add support for multi-LRC submission in the GuC backend (Matt B)
- Add extra cache flushing before making pages userspace visible (Matt A, Thomas)
- Mark internal GPU object pages dirty so they will be flushed properly (Matt A)
- Move remaining debugfs interfaces i915_wedged/i915_forcewake_user into gt (Andi)
- Replace the unconditional clflushes with drm_clflush_virt_range() (Ville)
- Remove IS_ACTIVE macro completely (Lucas)
- Improve kerneldocs for cache_dirty (Matt A)
- Add missing includes (Lucas)
- Selftest improvements (Matt R, Ran, Matt A)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/YXFmLKoq8Fg9JxSd@jlahtine-mobl.ger.corp.intel.com
Diffstat (limited to 'include/uapi/drm')
-rw-r--r-- | include/uapi/drm/i915_drm.h | 139 |
1 files changed, 138 insertions, 1 deletions
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 9b8e61163c39..914ebd9290e5 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1830,6 +1830,7 @@ struct drm_i915_gem_context_param { * Extensions: * i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE) * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) + * i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT) */ #define I915_CONTEXT_PARAM_ENGINES 0xa @@ -2105,6 +2106,135 @@ struct i915_context_engines_bond { } __attribute__((packed)) name__ /** + * struct i915_context_engines_parallel_submit - Configure engine for + * parallel submission. + * + * Setup a slot in the context engine map to allow multiple BBs to be submitted + * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU + * in parallel. Multiple hardware contexts are created internally in the i915 to + * run these BBs. Once a slot is configured for N BBs only N BBs can be + * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user + * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how + * many BBs there are based on the slot's configuration. The N BBs are the last + * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set. + * + * The default placement behavior is to create implicit bonds between each + * context if each context maps to more than 1 physical engine (e.g. context is + * a virtual engine). Also we only allow contexts of same engine class and these + * contexts must be in logically contiguous order. Examples of the placement + * behavior are described below. Lastly, the default is to not allow BBs to be + * preempted mid-batch. Rather insert coordinated preemption points on all + * hardware contexts between each set of BBs. Flags could be added in the future + * to change both of these default behaviors. + * + * Returns -EINVAL if hardware context placement configuration is invalid or if + * the placement configuration isn't supported on the platform / submission + * interface. + * Returns -ENODEV if extension isn't supported on the platform / submission + * interface. + * + * .. code-block:: none + * + * Examples syntax: + * CS[X] = generic engine of same class, logical instance X + * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE + * + * Example 1 pseudo code: + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=1, + * engines=CS[0],CS[1]) + * + * Results in the following valid placement: + * CS[0], CS[1] + * + * Example 2 pseudo code: + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=2, + * engines=CS[0],CS[2],CS[1],CS[3]) + * + * Results in the following valid placements: + * CS[0], CS[1] + * CS[2], CS[3] + * + * This can be thought of as two virtual engines, each containing two + * engines thereby making a 2D array. However, there are bonds tying the + * entries together and placing restrictions on how they can be scheduled. + * Specifically, the scheduler can choose only vertical columns from the 2D + * array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the + * scheduler wants to submit to CS[0], it must also choose CS[1] and vice + * versa. Same for CS[2] requires also using CS[3]. + * VE[0] = CS[0], CS[2] + * VE[1] = CS[1], CS[3] + * + * Example 3 pseudo code: + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=2, + * engines=CS[0],CS[1],CS[1],CS[3]) + * + * Results in the following valid and invalid placements: + * CS[0], CS[1] + * CS[1], CS[3] - Not logically contiguous, return -EINVAL + */ +struct i915_context_engines_parallel_submit { + /** + * @base: base user extension. + */ + struct i915_user_extension base; + + /** + * @engine_index: slot for parallel engine + */ + __u16 engine_index; + + /** + * @width: number of contexts per parallel engine or in other words the + * number of batches in each submission + */ + __u16 width; + + /** + * @num_siblings: number of siblings per context or in other words the + * number of possible placements for each submission + */ + __u16 num_siblings; + + /** + * @mbz16: reserved for future use; must be zero + */ + __u16 mbz16; + + /** + * @flags: all undefined flags must be zero, currently not defined flags + */ + __u64 flags; + + /** + * @mbz64: reserved for future use; must be zero + */ + __u64 mbz64[3]; + + /** + * @engines: 2-d array of engine instances to configure parallel engine + * + * length = width (i) * num_siblings (j) + * index = j + i * num_siblings + */ + struct i915_engine_class_instance engines[0]; + +} __packed; + +#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \ + struct i915_user_extension base; \ + __u16 engine_index; \ + __u16 width; \ + __u16 num_siblings; \ + __u16 mbz16; \ + __u64 flags; \ + __u64 mbz64[3]; \ + struct i915_engine_class_instance engines[N__]; \ +} __attribute__((packed)) name__ + +/** * DOC: Context Engine Map uAPI * * Context engine map is a new way of addressing engines when submitting batch- @@ -2163,6 +2293,7 @@ struct i915_context_param_engines { __u64 extensions; /* linked chain of extension blocks, 0 terminates */ #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ #define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */ +#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */ struct i915_engine_class_instance engines[0]; } __attribute__((packed)); @@ -2781,14 +2912,20 @@ struct drm_i915_engine_info { /** @flags: Engine flags. */ __u64 flags; +#define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE (1 << 0) /** @capabilities: Capabilities of this engine. */ __u64 capabilities; #define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0) #define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1) + /** @logical_instance: Logical instance of engine */ + __u16 logical_instance; + /** @rsvd1: Reserved fields. */ - __u64 rsvd1[4]; + __u16 rsvd1[3]; + /** @rsvd2: Reserved fields. */ + __u64 rsvd2[3]; }; /** |