diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_cmd_parser.c')
| -rw-r--r-- | drivers/gpu/drm/i915/i915_cmd_parser.c | 268 |
1 files changed, 163 insertions, 105 deletions
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 806e812340d0..306d9e4e5cf3 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -123,7 +123,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ), CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ), CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, - .reg = { .offset = 1, .mask = 0x007FFFFC } ), + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, W | B, .reg = { .offset = 1, .mask = 0x007FFFFC }, .bits = {{ @@ -395,16 +395,38 @@ static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = { /* * Register whitelists, sorted by increasing register offset. + */ + +/* + * An individual whitelist entry granting access to register addr. If + * mask is non-zero the argument of immediate register writes will be + * AND-ed with mask, and the command will be rejected if the result + * doesn't match value. + * + * Registers with non-zero mask are only allowed to be written using + * LRI. + */ +struct drm_i915_reg_descriptor { + u32 addr; + u32 mask; + u32 value; +}; + +/* Convenience macro for adding 32-bit registers. */ +#define REG32(address, ...) \ + { .addr = address, __VA_ARGS__ } + +/* + * Convenience macro for adding 64-bit registers. * * Some registers that userspace accesses are 64 bits. The register * access commands only allow 32-bit accesses. Hence, we have to include * entries for both halves of the 64-bit registers. */ +#define REG64(addr) \ + REG32(addr), REG32(addr + sizeof(u32)) -/* Convenience macro for adding 64-bit registers */ -#define REG64(addr) (addr), (addr + sizeof(u32)) - -static const u32 gen7_render_regs[] = { +static const struct drm_i915_reg_descriptor gen7_render_regs[] = { REG64(GPGPU_THREADS_DISPATCHED), REG64(HS_INVOCATION_COUNT), REG64(DS_INVOCATION_COUNT), @@ -417,15 +439,15 @@ static const u32 gen7_render_regs[] = { REG64(CL_PRIMITIVES_COUNT), REG64(PS_INVOCATION_COUNT), REG64(PS_DEPTH_COUNT), - OACONTROL, /* Only allowed for LRI and SRM. See below. */ + REG32(OACONTROL), /* Only allowed for LRI and SRM. See below. */ REG64(MI_PREDICATE_SRC0), REG64(MI_PREDICATE_SRC1), - GEN7_3DPRIM_END_OFFSET, - GEN7_3DPRIM_START_VERTEX, - GEN7_3DPRIM_VERTEX_COUNT, - GEN7_3DPRIM_INSTANCE_COUNT, - GEN7_3DPRIM_START_INSTANCE, - GEN7_3DPRIM_BASE_VERTEX, + REG32(GEN7_3DPRIM_END_OFFSET), + REG32(GEN7_3DPRIM_START_VERTEX), + REG32(GEN7_3DPRIM_VERTEX_COUNT), + REG32(GEN7_3DPRIM_INSTANCE_COUNT), + REG32(GEN7_3DPRIM_START_INSTANCE), + REG32(GEN7_3DPRIM_BASE_VERTEX), REG64(GEN7_SO_NUM_PRIMS_WRITTEN(0)), REG64(GEN7_SO_NUM_PRIMS_WRITTEN(1)), REG64(GEN7_SO_NUM_PRIMS_WRITTEN(2)), @@ -434,33 +456,41 @@ static const u32 gen7_render_regs[] = { REG64(GEN7_SO_PRIM_STORAGE_NEEDED(1)), REG64(GEN7_SO_PRIM_STORAGE_NEEDED(2)), REG64(GEN7_SO_PRIM_STORAGE_NEEDED(3)), - GEN7_SO_WRITE_OFFSET(0), - GEN7_SO_WRITE_OFFSET(1), - GEN7_SO_WRITE_OFFSET(2), - GEN7_SO_WRITE_OFFSET(3), - GEN7_L3SQCREG1, - GEN7_L3CNTLREG2, - GEN7_L3CNTLREG3, + REG32(GEN7_SO_WRITE_OFFSET(0)), + REG32(GEN7_SO_WRITE_OFFSET(1)), + REG32(GEN7_SO_WRITE_OFFSET(2)), + REG32(GEN7_SO_WRITE_OFFSET(3)), + REG32(GEN7_L3SQCREG1), + REG32(GEN7_L3CNTLREG2), + REG32(GEN7_L3CNTLREG3), + REG32(HSW_SCRATCH1, + .mask = ~HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE, + .value = 0), + REG32(HSW_ROW_CHICKEN3, + .mask = ~(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE << 16 | + HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), + .value = 0), }; -static const u32 gen7_blt_regs[] = { - BCS_SWCTRL, +static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { + REG32(BCS_SWCTRL), }; -static const u32 ivb_master_regs[] = { - FORCEWAKE_MT, - DERRMR, - GEN7_PIPE_DE_LOAD_SL(PIPE_A), - GEN7_PIPE_DE_LOAD_SL(PIPE_B), - GEN7_PIPE_DE_LOAD_SL(PIPE_C), +static const struct drm_i915_reg_descriptor ivb_master_regs[] = { + REG32(FORCEWAKE_MT), + REG32(DERRMR), + REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)), + REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)), + REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)), }; -static const u32 hsw_master_regs[] = { - FORCEWAKE_MT, - DERRMR, +static const struct drm_i915_reg_descriptor hsw_master_regs[] = { + REG32(FORCEWAKE_MT), + REG32(DERRMR), }; #undef REG64 +#undef REG32 static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) { @@ -550,14 +580,16 @@ static bool validate_cmds_sorted(struct intel_engine_cs *ring, return ret; } -static bool check_sorted(int ring_id, const u32 *reg_table, int reg_count) +static bool check_sorted(int ring_id, + const struct drm_i915_reg_descriptor *reg_table, + int reg_count) { int i; u32 previous = 0; bool ret = true; for (i = 0; i < reg_count; i++) { - u32 curr = reg_table[i]; + u32 curr = reg_table[i].addr; if (curr < previous) { DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n", @@ -804,37 +836,44 @@ find_cmd(struct intel_engine_cs *ring, return default_desc; } -static bool valid_reg(const u32 *table, int count, u32 addr) +static const struct drm_i915_reg_descriptor * +find_reg(const struct drm_i915_reg_descriptor *table, + int count, u32 addr) { - if (table && count != 0) { + if (table) { int i; for (i = 0; i < count; i++) { - if (table[i] == addr) - return true; + if (table[i].addr == addr) + return &table[i]; } } - return false; + return NULL; } -static u32 *vmap_batch(struct drm_i915_gem_object *obj) +static u32 *vmap_batch(struct drm_i915_gem_object *obj, + unsigned start, unsigned len) { int i; void *addr = NULL; struct sg_page_iter sg_iter; + int first_page = start >> PAGE_SHIFT; + int last_page = (len + start + 4095) >> PAGE_SHIFT; + int npages = last_page - first_page; struct page **pages; - pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages)); + pages = drm_malloc_ab(npages, sizeof(*pages)); if (pages == NULL) { DRM_DEBUG_DRIVER("Failed to get space for pages\n"); goto finish; } i = 0; - for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { - pages[i] = sg_page_iter_page(&sg_iter); - i++; + for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, first_page) { + pages[i++] = sg_page_iter_page(&sg_iter); + if (i == npages) + break; } addr = vmap(pages, i, 0, PAGE_KERNEL); @@ -855,61 +894,56 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, u32 batch_start_offset, u32 batch_len) { - int ret = 0; int needs_clflush = 0; - u32 *src_base, *dest_base = NULL; - u32 *src_addr, *dest_addr; - u32 offset = batch_start_offset / sizeof(*dest_addr); - u32 end = batch_start_offset + batch_len; + void *src_base, *src; + void *dst = NULL; + int ret; - if (end > dest_obj->base.size || end > src_obj->base.size) + if (batch_len > dest_obj->base.size || + batch_len + batch_start_offset > src_obj->base.size) return ERR_PTR(-E2BIG); + if (WARN_ON(dest_obj->pages_pin_count == 0)) + return ERR_PTR(-ENODEV); + ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush); if (ret) { - DRM_DEBUG_DRIVER("CMD: failed to prep read\n"); + DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n"); return ERR_PTR(ret); } - src_base = vmap_batch(src_obj); + src_base = vmap_batch(src_obj, batch_start_offset, batch_len); if (!src_base) { DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n"); ret = -ENOMEM; goto unpin_src; } - src_addr = src_base + offset; - - if (needs_clflush) - drm_clflush_virt_range((char *)src_addr, batch_len); - ret = i915_gem_object_set_to_cpu_domain(dest_obj, true); if (ret) { - DRM_DEBUG_DRIVER("CMD: Failed to set batch CPU domain\n"); + DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n"); goto unmap_src; } - dest_base = vmap_batch(dest_obj); - if (!dest_base) { + dst = vmap_batch(dest_obj, 0, batch_len); + if (!dst) { DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n"); ret = -ENOMEM; goto unmap_src; } - dest_addr = dest_base + offset; - - if (batch_start_offset != 0) - memset((u8 *)dest_base, 0, batch_start_offset); + src = src_base + offset_in_page(batch_start_offset); + if (needs_clflush) + drm_clflush_virt_range(src, batch_len); - memcpy(dest_addr, src_addr, batch_len); - memset((u8 *)dest_addr + batch_len, 0, dest_obj->base.size - end); + memcpy(dst, src, batch_len); unmap_src: vunmap(src_base); unpin_src: i915_gem_object_unpin_pages(src_obj); - return ret ? ERR_PTR(ret) : dest_base; + return ret ? ERR_PTR(ret) : dst; } /** @@ -934,7 +968,7 @@ bool i915_needs_cmd_parser(struct intel_engine_cs *ring) static bool check_cmd(const struct intel_engine_cs *ring, const struct drm_i915_cmd_descriptor *desc, - const u32 *cmd, + const u32 *cmd, u32 length, const bool is_master, bool *oacontrol_set) { @@ -950,38 +984,70 @@ static bool check_cmd(const struct intel_engine_cs *ring, } if (desc->flags & CMD_DESC_REGISTER) { - u32 reg_addr = cmd[desc->reg.offset] & desc->reg.mask; - /* - * OACONTROL requires some special handling for writes. We - * want to make sure that any batch which enables OA also - * disables it before the end of the batch. The goal is to - * prevent one process from snooping on the perf data from - * another process. To do that, we need to check the value - * that will be written to the register. Hence, limit - * OACONTROL writes to only MI_LOAD_REGISTER_IMM commands. + * Get the distance between individual register offset + * fields if the command can perform more than one + * access at a time. */ - if (reg_addr == OACONTROL) { - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { - DRM_DEBUG_DRIVER("CMD: Rejected LRM to OACONTROL\n"); + const u32 step = desc->reg.step ? desc->reg.step : length; + u32 offset; + + for (offset = desc->reg.offset; offset < length; + offset += step) { + const u32 reg_addr = cmd[offset] & desc->reg.mask; + const struct drm_i915_reg_descriptor *reg = + find_reg(ring->reg_table, ring->reg_count, + reg_addr); + + if (!reg && is_master) + reg = find_reg(ring->master_reg_table, + ring->master_reg_count, + reg_addr); + + if (!reg) { + DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n", + reg_addr, *cmd, ring->id); return false; } - if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1)) - *oacontrol_set = (cmd[2] != 0); - } + /* + * OACONTROL requires some special handling for + * writes. We want to make sure that any batch which + * enables OA also disables it before the end of the + * batch. The goal is to prevent one process from + * snooping on the perf data from another process. To do + * that, we need to check the value that will be written + * to the register. Hence, limit OACONTROL writes to + * only MI_LOAD_REGISTER_IMM commands. + */ + if (reg_addr == OACONTROL) { + if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { + DRM_DEBUG_DRIVER("CMD: Rejected LRM to OACONTROL\n"); + return false; + } + + if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1)) + *oacontrol_set = (cmd[offset + 1] != 0); + } - if (!valid_reg(ring->reg_table, - ring->reg_count, reg_addr)) { - if (!is_master || - !valid_reg(ring->master_reg_table, - ring->master_reg_count, - reg_addr)) { - DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n", - reg_addr, - *cmd, - ring->id); - return false; + /* + * Check the value written to the register against the + * allowed mask/value pair given in the whitelist entry. + */ + if (reg->mask) { + if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { + DRM_DEBUG_DRIVER("CMD: Rejected LRM to masked register 0x%08X\n", + reg_addr); + return false; + } + + if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) && + (offset + 2 > length || + (cmd[offset + 1] & reg->mask) != reg->value)) { + DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n", + reg_addr); + return false; + } } } } @@ -1046,34 +1112,26 @@ int i915_parse_cmds(struct intel_engine_cs *ring, u32 batch_len, bool is_master) { - int ret = 0; u32 *cmd, *batch_base, *batch_end; struct drm_i915_cmd_descriptor default_desc = { 0 }; bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ - - ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0); - if (ret) { - DRM_DEBUG_DRIVER("CMD: Failed to pin shadow batch\n"); - return -1; - } + int ret = 0; batch_base = copy_batch(shadow_batch_obj, batch_obj, batch_start_offset, batch_len); if (IS_ERR(batch_base)) { DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n"); - i915_gem_object_ggtt_unpin(shadow_batch_obj); return PTR_ERR(batch_base); } - cmd = batch_base + (batch_start_offset / sizeof(*cmd)); - /* * We use the batch length as size because the shadow object is as * large or larger and copy_batch() will write MI_NOPs to the extra * space. Parsing should be faster in some cases this way. */ - batch_end = cmd + (batch_len / sizeof(*batch_end)); + batch_end = batch_base + (batch_len / sizeof(*batch_end)); + cmd = batch_base; while (cmd < batch_end) { const struct drm_i915_cmd_descriptor *desc; u32 length; @@ -1113,7 +1171,8 @@ int i915_parse_cmds(struct intel_engine_cs *ring, break; } - if (!check_cmd(ring, desc, cmd, is_master, &oacontrol_set)) { + if (!check_cmd(ring, desc, cmd, length, is_master, + &oacontrol_set)) { ret = -EINVAL; break; } @@ -1132,7 +1191,6 @@ int i915_parse_cmds(struct intel_engine_cs *ring, } vunmap(batch_base); - i915_gem_object_ggtt_unpin(shadow_batch_obj); return ret; } |