aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h181
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c191
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c114
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c428
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h94
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c187
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c67
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c69
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c321
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_dpm.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c14
33 files changed, 1221 insertions, 911 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5c400f4b87fd..5a5f04d0902d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -389,7 +389,6 @@ struct amdgpu_clock {
* Fences.
*/
struct amdgpu_fence_driver {
- struct amdgpu_ring *ring;
uint64_t gpu_addr;
volatile uint32_t *cpu_addr;
/* sync_seq is protected by ring emission lock */
@@ -398,14 +397,13 @@ struct amdgpu_fence_driver {
bool initialized;
struct amdgpu_irq_src *irq_src;
unsigned irq_type;
- struct delayed_work lockup_work;
+ struct timer_list fallback_timer;
wait_queue_head_t fence_queue;
};
/* some special values for the owner field */
#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul)
#define AMDGPU_FENCE_OWNER_VM ((void*)1ul)
-#define AMDGPU_FENCE_OWNER_MOVE ((void*)2ul)
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
@@ -447,57 +445,11 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
-signed long amdgpu_fence_wait_any(struct fence **array,
- uint32_t count,
- bool intr,
- signed long t);
-struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence);
-void amdgpu_fence_unref(struct amdgpu_fence **fence);
-
bool amdgpu_fence_need_sync(struct amdgpu_fence *fence,
struct amdgpu_ring *ring);
void amdgpu_fence_note_sync(struct amdgpu_fence *fence,
struct amdgpu_ring *ring);
-static inline struct amdgpu_fence *amdgpu_fence_later(struct amdgpu_fence *a,
- struct amdgpu_fence *b)
-{
- if (!a) {
- return b;
- }
-
- if (!b) {
- return a;
- }
-
- BUG_ON(a->ring != b->ring);
-
- if (a->seq > b->seq) {
- return a;
- } else {
- return b;
- }
-}
-
-static inline bool amdgpu_fence_is_earlier(struct amdgpu_fence *a,
- struct amdgpu_fence *b)
-{
- if (!a) {
- return false;
- }
-
- if (!b) {
- return true;
- }
-
- BUG_ON(a->ring != b->ring);
-
- return a->seq < b->seq;
-}
-
-int amdgpu_user_fence_emit(struct amdgpu_ring *ring, struct amdgpu_user_fence *user,
- void *owner, struct amdgpu_fence **fence);
-
/*
* TTM.
*/
@@ -544,6 +496,7 @@ struct amdgpu_bo_va_mapping {
/* bo virtual addresses in a specific vm */
struct amdgpu_bo_va {
+ struct mutex mutex;
/* protected by bo being reserved */
struct list_head bo_list;
struct fence *last_pt_update;
@@ -586,6 +539,7 @@ struct amdgpu_bo {
/* Constant after initialization */
struct amdgpu_device *adev;
struct drm_gem_object gem_base;
+ struct amdgpu_bo *parent;
struct ttm_bo_kmap_obj dma_buf_vmap;
pid_t pid;
@@ -708,7 +662,7 @@ void amdgpu_semaphore_free(struct amdgpu_device *adev,
*/
struct amdgpu_sync {
struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS];
- struct amdgpu_fence *sync_to[AMDGPU_MAX_RINGS];
+ struct fence *sync_to[AMDGPU_MAX_RINGS];
DECLARE_HASHTABLE(fences, 4);
struct fence *last_vm_update;
};
@@ -964,8 +918,8 @@ struct amdgpu_ring {
#define AMDGPU_VM_FAULT_STOP_ALWAYS 2
struct amdgpu_vm_pt {
- struct amdgpu_bo *bo;
- uint64_t addr;
+ struct amdgpu_bo *bo;
+ uint64_t addr;
};
struct amdgpu_vm_id {
@@ -973,13 +927,9 @@ struct amdgpu_vm_id {
uint64_t pd_gpu_addr;
/* last flushed PD/PT update */
struct fence *flushed_updates;
- /* last use of vmid */
- struct amdgpu_fence *last_id_use;
};
struct amdgpu_vm {
- struct mutex mutex;
-
struct rb_root va;
/* protecting invalidated */
@@ -1004,24 +954,72 @@ struct amdgpu_vm {
/* for id and flush management per ring */
struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS];
+ /* for interval tree */
+ spinlock_t it_lock;
+ /* protecting freed */
+ spinlock_t freed_lock;
};
struct amdgpu_vm_manager {
- struct amdgpu_fence *active[AMDGPU_NUM_VM];
- uint32_t max_pfn;
+ struct {
+ struct fence *active;
+ atomic_long_t owner;
+ } ids[AMDGPU_NUM_VM];
+
+ uint32_t max_pfn;
/* number of VMIDs */
- unsigned nvm;
+ unsigned nvm;
/* vram base address for page table entry */
- u64 vram_base_offset;
+ u64 vram_base_offset;
/* is vm enabled? */
- bool enabled;
- /* for hw to save the PD addr on suspend/resume */
- uint32_t saved_table_addr[AMDGPU_NUM_VM];
+ bool enabled;
/* vm pte handling */
const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
struct amdgpu_ring *vm_pte_funcs_ring;
};
+void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct list_head *head);
+int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+ struct amdgpu_sync *sync);
+void amdgpu_vm_flush(struct amdgpu_ring *ring,
+ struct amdgpu_vm *vm,
+ struct fence *updates);
+void amdgpu_vm_fence(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct fence *fence);
+uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr);
+int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm);
+int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm);
+int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_sync *sync);
+int amdgpu_vm_bo_update(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ struct ttm_mem_reg *mem);
+void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo);
+struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo);
+struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo);
+int amdgpu_vm_bo_map(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ uint64_t addr, uint64_t offset,
+ uint64_t size, uint32_t flags);
+int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ uint64_t addr);
+void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va);
+int amdgpu_vm_free_job(struct amdgpu_job *job);
+
/*
* context related structures
*/
@@ -1235,6 +1233,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src, unsigned irq_type,
enum amdgpu_ring_type ring_type);
void amdgpu_ring_fini(struct amdgpu_ring *ring);
+struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f);
/*
* CS.
@@ -1257,6 +1256,7 @@ struct amdgpu_cs_parser {
/* relocations */
struct amdgpu_bo_list_entry *vm_bos;
struct list_head validated;
+ struct fence *fence;
struct amdgpu_ib *ibs;
uint32_t num_ibs;
@@ -1272,7 +1272,7 @@ struct amdgpu_job {
struct amdgpu_device *adev;
struct amdgpu_ib *ibs;
uint32_t num_ibs;
- struct mutex job_lock;
+ void *owner;
struct amdgpu_user_fence uf;
int (*free_job)(struct amdgpu_job *job);
};
@@ -1655,6 +1655,7 @@ struct amdgpu_pm {
u8 fan_max_rpm;
/* dpm */
bool dpm_enabled;
+ bool sysfs_initialized;
struct amdgpu_dpm dpm;
const struct firmware *fw; /* SMC firmware */
uint32_t fw_version;
@@ -1758,11 +1759,11 @@ void amdgpu_test_syncing(struct amdgpu_device *adev);
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
#else
-static int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
+static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
return -ENODEV;
}
-static void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
+static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
#endif
/*
@@ -2302,11 +2303,6 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev);
bool amdgpu_card_posted(struct amdgpu_device *adev);
void amdgpu_update_display_priority(struct amdgpu_device *adev);
bool amdgpu_boot_test_post_card(struct amdgpu_device *adev);
-struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
- struct drm_file *filp,
- struct amdgpu_ctx *ctx,
- struct amdgpu_ib *ibs,
- uint32_t num_ibs);
int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
@@ -2364,49 +2360,6 @@ long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg);
/*
- * vm
- */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
-void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
-struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct list_head *head);
-int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
- struct amdgpu_sync *sync);
-void amdgpu_vm_flush(struct amdgpu_ring *ring,
- struct amdgpu_vm *vm,
- struct fence *updates);
-void amdgpu_vm_fence(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_fence *fence);
-uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr);
-int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
-int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
-int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
- struct amdgpu_vm *vm, struct amdgpu_sync *sync);
-int amdgpu_vm_bo_update(struct amdgpu_device *adev,
- struct amdgpu_bo_va *bo_va,
- struct ttm_mem_reg *mem);
-void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
- struct amdgpu_bo *bo);
-struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
- struct amdgpu_bo *bo);
-struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_bo *bo);
-int amdgpu_vm_bo_map(struct amdgpu_device *adev,
- struct amdgpu_bo_va *bo_va,
- uint64_t addr, uint64_t offset,
- uint64_t size, uint32_t flags);
-int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
- struct amdgpu_bo_va *bo_va,
- uint64_t addr);
-void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
- struct amdgpu_bo_va *bo_va);
-int amdgpu_vm_free_job(struct amdgpu_job *job);
-/*
* functions used by amdgpu_encoder.c
*/
struct amdgpu_afmt_acr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 77f1d7c6ea3a..9416e0f5c1db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -672,8 +672,12 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
/* disp clock */
adev->clock.default_dispclk =
le32_to_cpu(firmware_info->info_21.ulDefaultDispEngineClkFreq);
- if (adev->clock.default_dispclk == 0)
- adev->clock.default_dispclk = 54000; /* 540 Mhz */
+ /* set a reasonable default for DP */
+ if (adev->clock.default_dispclk < 53900) {
+ DRM_INFO("Changing default dispclk from %dMhz to 600Mhz\n",
+ adev->clock.default_dispclk / 100);
+ adev->clock.default_dispclk = 60000;
+ }
adev->clock.dp_extclk =
le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
adev->clock.current_dispclk = adev->clock.default_dispclk;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 27ef52847e6d..4f352ec9dec4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -127,30 +127,6 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
return 0;
}
-struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
- struct drm_file *filp,
- struct amdgpu_ctx *ctx,
- struct amdgpu_ib *ibs,
- uint32_t num_ibs)
-{
- struct amdgpu_cs_parser *parser;
- int i;
-
- parser = kzalloc(sizeof(struct amdgpu_cs_parser), GFP_KERNEL);
- if (!parser)
- return NULL;
-
- parser->adev = adev;
- parser->filp = filp;
- parser->ctx = ctx;
- parser->ibs = ibs;
- parser->num_ibs = num_ibs;
- for (i = 0; i < num_ibs; i++)
- ibs[i].ctx = ctx;
-
- return parser;
-}
-
int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
{
union drm_amdgpu_cs *cs = data;
@@ -246,6 +222,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
}
p->uf.bo = gem_to_amdgpu_bo(gobj);
+ amdgpu_bo_ref(p->uf.bo);
+ drm_gem_object_unreference_unlocked(gobj);
p->uf.offset = fence_data->offset;
} else {
ret = -EINVAL;
@@ -463,8 +441,18 @@ static int cmp_size_smaller_first(void *priv, struct list_head *a,
return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
}
-static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff)
+/**
+ * cs_parser_fini() - clean parser states
+ * @parser: parser structure holding parsing context.
+ * @error: error number
+ *
+ * If error is set than unvalidate buffer, otherwise just free memory
+ * used by parsing context.
+ **/
+static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
{
+ unsigned i;
+
if (!error) {
/* Sort the buffer list from the smallest to largest buffer,
* which affects the order of buffers in the LRU list.
@@ -479,17 +467,14 @@ static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int err
list_sort(NULL, &parser->validated, cmp_size_smaller_first);
ttm_eu_fence_buffer_objects(&parser->ticket,
- &parser->validated,
- &parser->ibs[parser->num_ibs-1].fence->base);
+ &parser->validated,
+ parser->fence);
} else if (backoff) {
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
}
-}
+ fence_put(parser->fence);
-static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
-{
- unsigned i;
if (parser->ctx)
amdgpu_ctx_put(parser->ctx);
if (parser->bo_list)
@@ -499,31 +484,12 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
for (i = 0; i < parser->nchunks; i++)
drm_free_large(parser->chunks[i].kdata);
kfree(parser->chunks);
- if (!amdgpu_enable_scheduler)
- {
- if (parser->ibs)
- for (i = 0; i < parser->num_ibs; i++)
- amdgpu_ib_free(parser->adev, &parser->ibs[i]);
- kfree(parser->ibs);
- if (parser->uf.bo)
- drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
- }
-
- kfree(parser);
-}
-
-/**
- * cs_parser_fini() - clean parser states
- * @parser: parser structure holding parsing context.
- * @error: error number
- *
- * If error is set than unvalidate buffer, otherwise just free memory
- * used by parsing context.
- **/
-static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
-{
- amdgpu_cs_parser_fini_early(parser, error, backoff);
- amdgpu_cs_parser_fini_late(parser);
+ if (parser->ibs)
+ for (i = 0; i < parser->num_ibs; i++)
+ amdgpu_ib_free(parser->adev, &parser->ibs[i]);
+ kfree(parser->ibs);
+ if (parser->uf.bo)
+ amdgpu_bo_unref(&parser->uf.bo);
}
static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
@@ -610,15 +576,9 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
}
r = amdgpu_bo_vm_update_pte(parser, vm);
- if (r) {
- goto out;
- }
- amdgpu_cs_sync_rings(parser);
- if (!amdgpu_enable_scheduler)
- r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs,
- parser->filp);
+ if (!r)
+ amdgpu_cs_sync_rings(parser);
-out:
return r;
}
@@ -818,7 +778,7 @@ static int amdgpu_cs_free_job(struct amdgpu_job *job)
amdgpu_ib_free(job->adev, &job->ibs[i]);
kfree(job->ibs);
if (job->uf.bo)
- drm_gem_object_unreference_unlocked(&job->uf.bo->gem_base);
+ amdgpu_bo_unref(&job->uf.bo);
return 0;
}
@@ -826,38 +786,35 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
union drm_amdgpu_cs *cs = data;
- struct amdgpu_fpriv *fpriv = filp->driver_priv;
- struct amdgpu_vm *vm = &fpriv->vm;
- struct amdgpu_cs_parser *parser;
+ struct amdgpu_cs_parser parser = {};
bool reserved_buffers = false;
int i, r;
if (!adev->accel_working)
return -EBUSY;
- parser = amdgpu_cs_parser_create(adev, filp, NULL, NULL, 0);
- if (!parser)
- return -ENOMEM;
- r = amdgpu_cs_parser_init(parser, data);
+ parser.adev = adev;
+ parser.filp = filp;
+
+ r = amdgpu_cs_parser_init(&parser, data);
if (r) {
DRM_ERROR("Failed to initialize parser !\n");
- amdgpu_cs_parser_fini(parser, r, false);
+ amdgpu_cs_parser_fini(&parser, r, false);
r = amdgpu_cs_handle_lockup(adev, r);
return r;
}
- mutex_lock(&vm->mutex);
- r = amdgpu_cs_parser_relocs(parser);
+ r = amdgpu_cs_parser_relocs(&parser);
if (r == -ENOMEM)
DRM_ERROR("Not enough memory for command submission!\n");
else if (r && r != -ERESTARTSYS)
DRM_ERROR("Failed to process the buffer list %d!\n", r);
else if (!r) {
reserved_buffers = true;
- r = amdgpu_cs_ib_fill(adev, parser);
+ r = amdgpu_cs_ib_fill(adev, &parser);
}
if (!r) {
- r = amdgpu_cs_dependencies(adev, parser);
+ r = amdgpu_cs_dependencies(adev, &parser);
if (r)
DRM_ERROR("Failed in the dependencies handling %d!\n", r);
}
@@ -865,61 +822,71 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (r)
goto out;
- for (i = 0; i < parser->num_ibs; i++)
- trace_amdgpu_cs(parser, i);
+ for (i = 0; i < parser.num_ibs; i++)
+ trace_amdgpu_cs(&parser, i);
- r = amdgpu_cs_ib_vm_chunk(adev, parser);
+ r = amdgpu_cs_ib_vm_chunk(adev, &parser);
if (r)
goto out;
- if (amdgpu_enable_scheduler && parser->num_ibs) {
+ if (amdgpu_enable_scheduler && parser.num_ibs) {
+ struct amdgpu_ring * ring = parser.ibs->ring;
+ struct amd_sched_fence *fence;
struct amdgpu_job *job;
- struct amdgpu_ring * ring = parser->ibs->ring;
+
job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
- if (!job)
- return -ENOMEM;
+ if (!job) {
+ r = -ENOMEM;
+ goto out;
+ }
+
job->base.sched = &ring->sched;
- job->base.s_entity = &parser->ctx->rings[ring->idx].entity;
- job->adev = parser->adev;
- job->ibs = parser->ibs;
- job->num_ibs = parser->num_ibs;
- job->base.owner = parser->filp;
- mutex_init(&job->job_lock);
+ job->base.s_entity = &parser.ctx->rings[ring->idx].entity;
+ job->adev = parser.adev;
+ job->owner = parser.filp;
+ job->free_job = amdgpu_cs_free_job;
+
+ job->ibs = parser.ibs;
+ job->num_ibs = parser.num_ibs;
+ parser.ibs = NULL;
+ parser.num_ibs = 0;
+
if (job->ibs[job->num_ibs - 1].user) {
- memcpy(&job->uf, &parser->uf,
- sizeof(struct amdgpu_user_fence));
+ job->uf = parser.uf;
job->ibs[job->num_ibs - 1].user = &job->uf;
+ parser.uf.bo = NULL;
}
- job->free_job = amdgpu_cs_free_job;
- mutex_lock(&job->job_lock);
- r = amd_sched_entity_push_job(&job->base);
- if (r) {
- mutex_unlock(&job->job_lock);
+ fence = amd_sched_fence_create(job->base.s_entity,
+ parser.filp);
+ if (!fence) {
+ r = -ENOMEM;
amdgpu_cs_free_job(job);
kfree(job);
goto out;
}
- cs->out.handle =
- amdgpu_ctx_add_fence(parser->ctx, ring,
- &job->base.s_fence->base);
- parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle;
+ job->base.s_fence = fence;
+ parser.fence = fence_get(&fence->base);
- list_sort(NULL, &parser->validated, cmp_size_smaller_first);
- ttm_eu_fence_buffer_objects(&parser->ticket,
- &parser->validated,
- &job->base.s_fence->base);
+ cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring,
+ &fence->base);
+ job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
- mutex_unlock(&job->job_lock);
- amdgpu_cs_parser_fini_late(parser);
- mutex_unlock(&vm->mutex);
- return 0;
+ trace_amdgpu_cs_ioctl(job);
+ amd_sched_entity_push_job(&job->base);
+
+ } else {
+ struct amdgpu_fence *fence;
+
+ r = amdgpu_ib_schedule(adev, parser.num_ibs, parser.ibs,
+ parser.filp);
+ fence = parser.ibs[parser.num_ibs - 1].fence;
+ parser.fence = fence_get(&fence->base);
+ cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence;
}
- cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence;
out:
- amdgpu_cs_parser_fini(parser, r, reserved_buffers);
- mutex_unlock(&vm->mutex);
+ amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
r = amdgpu_cs_handle_lockup(adev, r);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index e0b80ccdfe8a..fec65f01c031 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -69,6 +69,9 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
struct amdgpu_device *adev = ctx->adev;
unsigned i, j;
+ if (!adev)
+ return;
+
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j)
fence_put(ctx->rings[i].fences[j]);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index d747fca59669..5580d3420c3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -73,6 +73,8 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
struct drm_crtc *crtc = &amdgpuCrtc->base;
unsigned long flags;
unsigned i;
+ int vpos, hpos, stat, min_udelay;
+ struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
amdgpu_flip_wait_fence(adev, &work->excl);
for (i = 0; i < work->shared_count; ++i)
@@ -81,8 +83,41 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
/* We borrow the event spin lock for protecting flip_status */
spin_lock_irqsave(&crtc->dev->event_lock, flags);
- /* set the proper interrupt */
- amdgpu_irq_get(adev, &adev->pageflip_irq, work->crtc_id);
+ /* If this happens to execute within the "virtually extended" vblank
+ * interval before the start of the real vblank interval then it needs
+ * to delay programming the mmio flip until the real vblank is entered.
+ * This prevents completing a flip too early due to the way we fudge
+ * our vblank counter and vblank timestamps in order to work around the
+ * problem that the hw fires vblank interrupts before actual start of
+ * vblank (when line buffer refilling is done for a frame). It
+ * complements the fudging logic in amdgpu_get_crtc_scanoutpos() for
+ * timestamping and amdgpu_get_vblank_counter_kms() for vblank counts.
+ *
+ * In practice this won't execute very often unless on very fast
+ * machines because the time window for this to happen is very small.
+ */
+ for (;;) {
+ /* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
+ * start in hpos, and to the "fudged earlier" vblank start in
+ * vpos.
+ */
+ stat = amdgpu_get_crtc_scanoutpos(adev->ddev, work->crtc_id,
+ GET_DISTANCE_TO_VBLANKSTART,
+ &vpos, &hpos, NULL, NULL,
+ &crtc->hwmode);
+
+ if ((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
+ (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE) ||
+ !(vpos >= 0 && hpos <= 0))
+ break;
+
+ /* Sleep at least until estimated real start of hw vblank */
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+ min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
+ usleep_range(min_udelay, 2 * min_udelay);
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+ };
+
/* do the flip (mmio) */
adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
/* set the flip status */
@@ -111,7 +146,7 @@ static void amdgpu_unpin_work_func(struct work_struct *__work)
} else
DRM_ERROR("failed to reserve buffer after flip\n");
- drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
+ amdgpu_bo_unref(&work->old_rbo);
kfree(work->shared);
kfree(work);
}
@@ -150,8 +185,8 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
obj = old_amdgpu_fb->obj;
/* take a reference to the old object */
- drm_gem_object_reference(obj);
work->old_rbo = gem_to_amdgpu_bo(obj);
+ amdgpu_bo_ref(work->old_rbo);
new_amdgpu_fb = to_amdgpu_framebuffer(fb);
obj = new_amdgpu_fb->obj;
@@ -181,10 +216,6 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
goto cleanup;
}
- fence_get(work->excl);
- for (i = 0; i < work->shared_count; ++i)
- fence_get(work->shared[i]);
-
amdgpu_bo_get_tiling_flags(new_rbo, &tiling_flags);
amdgpu_bo_unreserve(new_rbo);
@@ -228,7 +259,7 @@ pflip_cleanup:
amdgpu_bo_unreserve(new_rbo);
cleanup:
- drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
+ amdgpu_bo_unref(&work->old_rbo);
fence_put(work->excl);
for (i = 0; i < work->shared_count; ++i)
fence_put(work->shared[i]);
@@ -718,6 +749,15 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
* \param dev Device to query.
* \param pipe Crtc to query.
* \param flags Flags from caller (DRM_CALLED_FROM_VBLIRQ or 0).
+ * For driver internal use only also supports these flags:
+ *
+ * USE_REAL_VBLANKSTART to use the real start of vblank instead
+ * of a fudged earlier start of vblank.
+ *
+ * GET_DISTANCE_TO_VBLANKSTART to return distance to the
+ * fudged earlier start of vblank in *vpos and the distance
+ * to true start of vblank in *hpos.
+ *
* \param *vpos Location where vertical scanout position should be stored.
* \param *hpos Location where horizontal scanout position should go.
* \param *stime Target location for timestamp taken immediately before
@@ -782,10 +822,40 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
vbl_end = 0;
}
+ /* Called from driver internal vblank counter query code? */
+ if (flags & GET_DISTANCE_TO_VBLANKSTART) {
+ /* Caller wants distance from real vbl_start in *hpos */
+ *hpos = *vpos - vbl_start;
+ }
+
+ /* Fudge vblank to start a few scanlines earlier to handle the
+ * problem that vblank irqs fire a few scanlines before start
+ * of vblank. Some driver internal callers need the true vblank
+ * start to be used and signal this via the USE_REAL_VBLANKSTART flag.
+ *
+ * The cause of the "early" vblank irq is that the irq is triggered
+ * by the line buffer logic when the line buffer read position enters
+ * the vblank, whereas our crtc scanout position naturally lags the
+ * line buffer read position.
+ */
+ if (!(flags & USE_REAL_VBLANKSTART))
+ vbl_start -= adev->mode_info.crtcs[pipe]->lb_vblank_lead_lines;
+
/* Test scanout position against vblank region. */
if ((*vpos < vbl_start) && (*vpos >= vbl_end))
in_vbl = false;
+ /* In vblank? */
+ if (in_vbl)
+ ret |= DRM_SCANOUTPOS_IN_VBLANK;
+
+ /* Called from driver internal vblank counter query code? */
+ if (flags & GET_DISTANCE_TO_VBLANKSTART) {
+ /* Caller wants distance from fudged earlier vbl_start */
+ *vpos -= vbl_start;
+ return ret;
+ }
+
/* Check if inside vblank area and apply corrective offsets:
* vpos will then be >=0 in video scanout area, but negative
* within vblank area, counting down the number of lines until
@@ -801,32 +871,6 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
/* Correct for shifted end of vbl at vbl_end. */
*vpos = *vpos - vbl_end;
- /* In vblank? */
- if (in_vbl)
- ret |= DRM_SCANOUTPOS_IN_VBLANK;
-
- /* Is vpos outside nominal vblank area, but less than
- * 1/100 of a frame height away from start of vblank?
- * If so, assume this isn't a massively delayed vblank
- * interrupt, but a vblank interrupt that fired a few
- * microseconds before true start of vblank. Compensate
- * by adding a full frame duration to the final timestamp.
- * Happens, e.g., on ATI R500, R600.
- *
- * We only do this if DRM_CALLED_FROM_VBLIRQ.
- */
- if ((flags & DRM_CALLED_FROM_VBLIRQ) && !in_vbl) {
- vbl_start = mode->crtc_vdisplay;
- vtotal = mode->crtc_vtotal;
-
- if (vbl_start - *vpos < vtotal / 100) {
- *vpos -= vtotal;
-
- /* Signal this correction as "applied". */
- ret |= 0x8;
- }
- }
-
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 96290d9cddca..093a8c618931 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -207,6 +207,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
}
info->par = rfbdev;
+ info->skip_vt_switch = true;
ret = amdgpu_framebuffer_init(adev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
if (ret) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 003a219943f1..3671f9f220bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -47,6 +47,9 @@
* that the the relevant GPU caches have been flushed.
*/
+static struct kmem_cache *amdgpu_fence_slab;
+static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0);
+
/**
* amdgpu_fence_write - write a fence value
*
@@ -85,24 +88,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
}
/**
- * amdgpu_fence_schedule_check - schedule lockup check
- *
- * @ring: pointer to struct amdgpu_ring
- *
- * Queues a delayed work item to check for lockups.
- */
-static void amdgpu_fence_schedule_check(struct amdgpu_ring *ring)
-{
- /*
- * Do not reset the timer here with mod_delayed_work,
- * this can livelock in an interaction with TTM delayed destroy.
- */
- queue_delayed_work(system_power_efficient_wq,
- &ring->fence_drv.lockup_work,
- AMDGPU_FENCE_JIFFIES_TIMEOUT);
-}
-
-/**
* amdgpu_fence_emit - emit a fence on the requested ring
*
* @ring: ring the fence is associated with
@@ -118,7 +103,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
struct amdgpu_device *adev = ring->adev;
/* we are protected by the ring emission mutex */
- *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+ *fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
if ((*fence) == NULL) {
return -ENOMEM;
}
@@ -132,44 +117,20 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
(*fence)->seq,
AMDGPU_FENCE_FLAG_INT);
- trace_amdgpu_fence_emit(ring->adev->ddev, ring->idx, (*fence)->seq);
return 0;
}
/**
- * amdgpu_fence_check_signaled - callback from fence_queue
+ * amdgpu_fence_schedule_fallback - schedule fallback check
*
- * this function is called with fence_queue lock held, which is also used
- * for the fence locking itself, so unlocked variants are used for
- * fence_signal, and remove_wait_queue.
+ * @ring: pointer to struct amdgpu_ring
+ *
+ * Start a timer as fallback to our interrupts.
*/
-static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
{
- struct amdgpu_fence *fence;
- struct amdgpu_device *adev;
- u64 seq;
- int ret;
-
- fence = container_of(wait, struct amdgpu_fence, fence_wake);
- adev = fence->ring->adev;
-
- /*
- * We cannot use amdgpu_fence_process here because we're already
- * in the waitqueue, in a call from wake_up_all.
- */
- seq = atomic64_read(&fence->ring->fence_drv.last_seq);
- if (seq >= fence->seq) {
- ret = fence_signal_locked(&fence->base);
- if (!ret)
- FENCE_TRACE(&fence->base, "signaled from irq context\n");
- else
- FENCE_TRACE(&fence->base, "was already signaled\n");
-
- __remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
- fence_put(&fence->base);
- } else
- FENCE_TRACE(&fence->base, "pending\n");
- return 0;
+ mod_timer(&ring->fence_drv.fallback_timer,
+ jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT);
}
/**
@@ -238,45 +199,38 @@ static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
} while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
if (seq < last_emitted)
- amdgpu_fence_schedule_check(ring);
+ amdgpu_fence_schedule_fallback(ring);
return wake;
}
/**
- * amdgpu_fence_check_lockup - check for hardware lockup
+ * amdgpu_fence_process - process a fence
*
- * @work: delayed work item
+ * @adev: amdgpu_device pointer
+ * @ring: ring index the fence is associated with
*
- * Checks for fence activity and if there is none probe
- * the hardware if a lockup occured.
+ * Checks the current fence value and wakes the fence queue
+ * if the sequence number has increased (all asics).
*/
-static void amdgpu_fence_check_lockup(struct work_struct *work)
+void amdgpu_fence_process(struct amdgpu_ring *ring)
{
- struct amdgpu_fence_driver *fence_drv;
- struct amdgpu_ring *ring;
-
- fence_drv = container_of(work, struct amdgpu_fence_driver,
- lockup_work.work);
- ring = fence_drv->ring;
-
if (amdgpu_fence_activity(ring))
wake_up_all(&ring->fence_drv.fence_queue);
}
/**
- * amdgpu_fence_process - process a fence
+ * amdgpu_fence_fallback - fallback for hardware interrupts
*
- * @adev: amdgpu_device pointer
- * @ring: ring index the fence is associated with
+ * @work: delayed work item
*
- * Checks the current fence value and wakes the fence queue
- * if the sequence number has increased (all asics).
+ * Checks for fence activity.
*/
-void amdgpu_fence_process(struct amdgpu_ring *ring)
+static void amdgpu_fence_fallback(unsigned long arg)
{
- if (amdgpu_fence_activity(ring))
- wake_up_all(&ring->fence_drv.fence_queue);
+ struct amdgpu_ring *ring = (void *)arg;
+
+ amdgpu_fence_process(ring);
}
/**
@@ -305,47 +259,6 @@ static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
return false;
}
-static bool amdgpu_fence_is_signaled(struct fence *f)
-{
- struct amdgpu_fence *fence = to_amdgpu_fence(f);
- struct amdgpu_ring *ring = fence->ring;
-
- if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
- return true;
-
- amdgpu_fence_process(ring);
-
- if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
- return true;
-
- return false;
-}
-
-/**
- * amdgpu_fence_enable_signaling - enable signalling on fence
- * @fence: fence
- *
- * This function is called with fence_queue lock held, and adds a callback
- * to fence_queue that checks if this fence is signaled, and if so it
- * signals the fence and removes itself.
- */
-static bool amdgpu_fence_enable_signaling(struct fence *f)
-{
- struct amdgpu_fence *fence = to_amdgpu_fence(f);
- struct amdgpu_ring *ring = fence->ring;
-
- if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
- return false;
-
- fence->fence_wake.flags = 0;
- fence->fence_wake.private = NULL;
- fence->fence_wake.func = amdgpu_fence_check_signaled;
- __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
- fence_get(f);
- FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
- return true;
-}
-
/*
* amdgpu_ring_wait_seq_timeout - wait for seq of the specific ring to signal
* @ring: ring to wait on for the seq number
@@ -367,6 +280,7 @@ static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
return 0;
+ amdgpu_fence_schedule_fallback(ring);
wait_event(ring->fence_drv.fence_queue, (
(signaled = amdgpu_fence_seq_signaled(ring, seq))));
@@ -417,36 +331,6 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
}
/**
- * amdgpu_fence_ref - take a ref on a fence
- *
- * @fence: amdgpu fence object
- *
- * Take a reference on a fence (all asics).
- * Returns the fence.
- */
-struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence)
-{
- fence_get(&fence->base);
- return fence;
-}
-
-/**
- * amdgpu_fence_unref - remove a ref on a fence
- *
- * @fence: amdgpu fence object
- *
- * Remove a reference on a fence (all asics).
- */
-void amdgpu_fence_unref(struct amdgpu_fence **fence)
-{
- struct amdgpu_fence *tmp = *fence;
-
- *fence = NULL;
- if (tmp)
- fence_put(&tmp->base);
-}
-
-/**
* amdgpu_fence_count_emitted - get the count of emitted fences
*
* @ring: ring the fence is associated with
@@ -597,9 +481,8 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
atomic64_set(&ring->fence_drv.last_seq, 0);
ring->fence_drv.initialized = false;
- INIT_DELAYED_WORK(&ring->fence_drv.lockup_work,
- amdgpu_fence_check_lockup);
- ring->fence_drv.ring = ring;
+ setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
+ (unsigned long)ring);
init_waitqueue_head(&ring->fence_drv.fence_queue);
@@ -642,6 +525,13 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
*/
int amdgpu_fence_driver_init(struct amdgpu_device *adev)
{
+ if (atomic_inc_return(&amdgpu_fence_slab_ref) == 1) {
+ amdgpu_fence_slab = kmem_cache_create(
+ "amdgpu_fence", sizeof(struct amdgpu_fence), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!amdgpu_fence_slab)
+ return -ENOMEM;
+ }
if (amdgpu_debugfs_fence_init(adev))
dev_err(adev->dev, "fence debugfs file creation failed\n");
@@ -660,9 +550,12 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
{
int i, r;
+ if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
+ kmem_cache_destroy(amdgpu_fence_slab);
mutex_lock(&adev->ring_lock);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
+
if (!ring || !ring->fence_drv.initialized)
continue;
r = amdgpu_fence_wait_empty(ring);
@@ -674,6 +567,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
amd_sched_fini(&ring->sched);
+ del_timer_sync(&ring->fence_drv.fallback_timer);
ring->fence_drv.initialized = false;
}
mutex_unlock(&adev->ring_lock);
@@ -761,6 +655,122 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
}
}
+/*
+ * Common fence implementation
+ */
+
+static const char *amdgpu_fence_get_driver_name(struct fence *fence)
+{
+ return "amdgpu";
+}
+
+static const char *amdgpu_fence_get_timeline_name(struct fence *f)
+{
+ struct amdgpu_fence *fence = to_amdgpu_fence(f);
+ return (const char *)fence->ring->name;
+}
+
+/**
+ * amdgpu_fence_is_signaled - test if fence is signaled
+ *
+ * @f: fence to test
+ *
+ * Test the fence sequence number if it is already signaled. If it isn't
+ * signaled start fence processing. Returns True if the fence is signaled.
+ */
+static bool amdgpu_fence_is_signaled(struct fence *f)
+{
+ struct amdgpu_fence *fence = to_amdgpu_fence(f);
+ struct amdgpu_ring *ring = fence->ring;
+
+ if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
+ return true;
+
+ amdgpu_fence_process(ring);
+
+ if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
+ return true;
+
+ return false;
+}
+
+/**
+ * amdgpu_fence_check_signaled - callback from fence_queue
+ *
+ * this function is called with fence_queue lock held, which is also used
+ * for the fence locking itself, so unlocked variants are used for
+ * fence_signal, and remove_wait_queue.
+ */
+static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+{
+ struct amdgpu_fence *fence;
+ struct amdgpu_device *adev;
+ u64 seq;
+ int ret;
+
+ fence = container_of(wait, struct amdgpu_fence, fence_wake);
+ adev = fence->ring->adev;
+
+ /*
+ * We cannot use amdgpu_fence_process here because we're already
+ * in the waitqueue, in a call from wake_up_all.
+ */
+ seq = atomic64_read(&fence->ring->fence_drv.last_seq);
+ if (seq >= fence->seq) {
+ ret = fence_signal_locked(&fence->base);
+ if (!ret)
+ FENCE_TRACE(&fence->base, "signaled from irq context\n");
+ else
+ FENCE_TRACE(&fence->base, "was already signaled\n");
+
+ __remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
+ fence_put(&fence->base);
+ } else
+ FENCE_TRACE(&fence->base, "pending\n");
+ return 0;
+}
+
+/**
+ * amdgpu_fence_enable_signaling - enable signalling on fence
+ * @fence: fence
+ *
+ * This function is called with fence_queue lock held, and adds a callback
+ * to fence_queue that checks if this fence is signaled, and if so it
+ * signals the fence and removes itself.
+ */
+static bool amdgpu_fence_enable_signaling(struct fence *f)
+{
+ struct amdgpu_fence *fence = to_amdgpu_fence(f);
+ struct amdgpu_ring *ring = fence->ring;
+
+ if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
+ return false;
+
+ fence->fence_wake.flags = 0;
+ fence->fence_wake.private = NULL;
+ fence->fence_wake.func = amdgpu_fence_check_signaled;
+ __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
+ fence_get(f);
+ if (!timer_pending(&ring->fence_drv.fallback_timer))
+ amdgpu_fence_schedule_fallback(ring);
+ FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
+ return true;
+}
+
+static void amdgpu_fence_release(struct fence *f)
+{
+ struct amdgpu_fence *fence = to_amdgpu_fence(f);
+ kmem_cache_free(amdgpu_fence_slab, fence);
+}
+
+const struct fence_ops amdgpu_fence_ops = {
+ .get_driver_name = amdgpu_fence_get_driver_name,
+ .get_timeline_name = amdgpu_fence_get_timeline_name,
+ .enable_signaling = amdgpu_fence_enable_signaling,
+ .signaled = amdgpu_fence_is_signaled,
+ .wait = fence_default_wait,
+ .release = amdgpu_fence_release,
+};
/*
* Fence debugfs
@@ -811,131 +821,3 @@ int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
#endif
}
-static const char *amdgpu_fence_get_driver_name(struct fence *fence)
-{
- return "amdgpu";
-}
-
-static const char *amdgpu_fence_get_timeline_name(struct fence *f)
-{
- struct amdgpu_fence *fence = to_amdgpu_fence(f);
- return (const char *)fence->ring->name;
-}
-
-static inline bool amdgpu_test_signaled(struct amdgpu_fence *fence)
-{
- return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
-}
-
-static bool amdgpu_test_signaled_any(struct fence **fences, uint32_t count)
-{
- int idx;
- struct fence *fence;
-
- for (idx = 0; idx < count; ++idx) {
- fence = fences[idx];
- if (fence) {
- if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
- return true;
- }
- }
- return false;
-}
-
-struct amdgpu_wait_cb {
- struct fence_cb base;
- struct task_struct *task;
-};
-
-static void amdgpu_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
-{
- struct amdgpu_wait_cb *wait =
- container_of(cb, struct amdgpu_wait_cb, base);
- wake_up_process(wait->task);
-}
-
-static signed long amdgpu_fence_default_wait(struct fence *f, bool intr,
- signed long t)
-{
- return amdgpu_fence_wait_any(&f, 1, intr, t);
-}
-
-/**
- * Wait the fence array with timeout
- *
- * @array: the fence array with amdgpu fence pointer
- * @count: the number of the fence array
- * @intr: when sleep, set the current task interruptable or not
- * @t: timeout to wait
- *
- * It will return when any fence is signaled or timeout.
- */
-signed long amdgpu_fence_wait_any(struct fence **array, uint32_t count,
- bool intr, signed long t)
-{
- struct amdgpu_wait_cb *cb;
- struct fence *fence;
- unsigned idx;
-
- BUG_ON(!array);
-
- cb = kcalloc(count, sizeof(struct amdgpu_wait_cb), GFP_KERNEL);
- if (cb == NULL) {
- t = -ENOMEM;
- goto err_free_cb;
- }
-
- for (idx = 0; idx < count; ++idx) {
- fence = array[idx];
- if (fence) {
- cb[idx].task = current;
- if (fence_add_callback(fence,
- &cb[idx].base, amdgpu_fence_wait_cb)) {
- /* The fence is already signaled */
- goto fence_rm_cb;
- }
- }
- }
-
- while (t > 0) {
- if (intr)
- set_current_state(TASK_INTERRUPTIBLE);
- else
- set_current_state(TASK_UNINTERRUPTIBLE);
-
- /*
- * amdgpu_test_signaled_any must be called after
- * set_current_state to prevent a race with wake_up_process
- */
- if (amdgpu_test_signaled_any(array, count))
- break;
-
- t = schedule_timeout(t);
-
- if (t > 0 && intr && signal_pending(current))
- t = -ERESTARTSYS;
- }
-
- __set_current_state(TASK_RUNNING);
-
-fence_rm_cb:
- for (idx = 0; idx < count; ++idx) {
- fence = array[idx];
- if (fence && cb[idx].base.func)
- fence_remove_callback(fence, &cb[idx].base);
- }
-
-err_free_cb:
- kfree(cb);
-
- return t;
-}
-
-const struct fence_ops amdgpu_fence_ops = {
- .get_driver_name = amdgpu_fence_get_driver_name,
- .get_timeline_name = amdgpu_fence_get_timeline_name,
- .enable_signaling = amdgpu_fence_enable_signaling,
- .signaled = amdgpu_fence_is_signaled,
- .wait = amdgpu_fence_default_wait,
- .release = NULL,
-};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 087332858853..f6ea4b43a60c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -115,12 +115,9 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va *bo_va;
int r;
- mutex_lock(&vm->mutex);
r = amdgpu_bo_reserve(rbo, false);
- if (r) {
- mutex_unlock(&vm->mutex);
+ if (r)
return r;
- }
bo_va = amdgpu_vm_bo_find(vm, rbo);
if (!bo_va) {
@@ -129,7 +126,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri
++bo_va->ref_count;
}
amdgpu_bo_unreserve(rbo);
- mutex_unlock(&vm->mutex);
return 0;
}
@@ -142,10 +138,8 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va *bo_va;
int r;
- mutex_lock(&vm->mutex);
r = amdgpu_bo_reserve(rbo, true);
if (r) {
- mutex_unlock(&vm->mutex);
dev_err(adev->dev, "leaking bo va because "
"we fail to reserve bo (%d)\n", r);
return;
@@ -157,7 +151,6 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
}
}
amdgpu_bo_unreserve(rbo);
- mutex_unlock(&vm->mutex);
}
static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
@@ -242,8 +235,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
AMDGPU_GEM_USERPTR_REGISTER))
return -EINVAL;
- if (!(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) ||
- !(args->flags & AMDGPU_GEM_USERPTR_REGISTER)) {
+ if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) && (
+ !(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) ||
+ !(args->flags & AMDGPU_GEM_USERPTR_REGISTER))) {
/* if we want to write to it we must require anonymous
memory and install a MMU notifier */
@@ -483,6 +477,9 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
if (domain == AMDGPU_GEM_DOMAIN_CPU)
goto error_unreserve;
}
+ r = amdgpu_vm_update_page_directory(adev, bo_va->vm);
+ if (r)
+ goto error_unreserve;
r = amdgpu_vm_clear_freed(adev, bo_va->vm);
if (r)
@@ -512,6 +509,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_bo *rbo;
struct amdgpu_bo_va *bo_va;
+ struct ttm_validate_buffer tv, tv_pd;
+ struct ww_acquire_ctx ticket;
+ struct list_head list, duplicates;
uint32_t invalid_flags, va_flags = 0;
int r = 0;
@@ -547,19 +547,28 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
gobj = drm_gem_object_lookup(dev, filp, args->handle);
if (gobj == NULL)
return -ENOENT;
- mutex_lock(&fpriv->vm.mutex);
rbo = gem_to_amdgpu_bo(gobj);
- r = amdgpu_bo_reserve(rbo, false);
+ INIT_LIST_HEAD(&list);
+ INIT_LIST_HEAD(&duplicates);
+ tv.bo = &rbo->tbo;
+ tv.shared = true;
+ list_add(&tv.head, &list);
+
+ if (args->operation == AMDGPU_VA_OP_MAP) {
+ tv_pd.bo = &fpriv->vm.page_directory->tbo;
+ tv_pd.shared = true;
+ list_add(&tv_pd.head, &list);
+ }
+ r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
if (r) {
- mutex_unlock(&fpriv->vm.mutex);
drm_gem_object_unreference_unlocked(gobj);
return r;
}
bo_va = amdgpu_vm_bo_find(&fpriv->vm, rbo);
if (!bo_va) {
- amdgpu_bo_unreserve(rbo);
- mutex_unlock(&fpriv->vm.mutex);
+ ttm_eu_backoff_reservation(&ticket, &list);
+ drm_gem_object_unreference_unlocked(gobj);
return -ENOENT;
}
@@ -581,10 +590,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
default:
break;
}
-
+ ttm_eu_backoff_reservation(&ticket, &list);
if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE))
amdgpu_gem_va_update_vm(adev, bo_va, args->operation);
- mutex_unlock(&fpriv->vm.mutex);
+
drm_gem_object_unreference_unlocked(gobj);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index aad4c1c69448..9e25edafa721 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -62,7 +62,7 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
int r;
if (size) {
- r = amdgpu_sa_bo_new(adev, &adev->ring_tmp_bo,
+ r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
&ib->sa_bo, size, 256);
if (r) {
dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
@@ -95,7 +95,8 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
{
amdgpu_sync_free(adev, &ib->sync, &ib->fence->base);
amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base);
- amdgpu_fence_unref(&ib->fence);
+ if (ib->fence)
+ fence_put(&ib->fence->base);
}
/**
@@ -215,7 +216,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
}
if (ib->vm)
- amdgpu_vm_fence(adev, ib->vm, ib->fence);
+ amdgpu_vm_fence(adev, ib->vm, &ib->fence->base);
amdgpu_ring_unlock_commit(ring);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 1618e2294a16..e23843f4d877 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -611,13 +611,59 @@ void amdgpu_driver_preclose_kms(struct drm_device *dev,
u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe)
{
struct amdgpu_device *adev = dev->dev_private;
+ int vpos, hpos, stat;
+ u32 count;
if (pipe >= adev->mode_info.num_crtc) {
DRM_ERROR("Invalid crtc %u\n", pipe);
return -EINVAL;
}
- return amdgpu_display_vblank_get_counter(adev, pipe);
+ /* The hw increments its frame counter at start of vsync, not at start
+ * of vblank, as is required by DRM core vblank counter handling.
+ * Cook the hw count here to make it appear to the caller as if it
+ * incremented at start of vblank. We measure distance to start of
+ * vblank in vpos. vpos therefore will be >= 0 between start of vblank
+ * and start of vsync, so vpos >= 0 means to bump the hw frame counter
+ * result by 1 to give the proper appearance to caller.
+ */
+ if (adev->mode_info.crtcs[pipe]) {
+ /* Repeat readout if needed to provide stable result if
+ * we cross start of vsync during the queries.
+ */
+ do {
+ count = amdgpu_display_vblank_get_counter(adev, pipe);
+ /* Ask amdgpu_get_crtc_scanoutpos to return vpos as
+ * distance to start of vblank, instead of regular
+ * vertical scanout pos.
+ */
+ stat = amdgpu_get_crtc_scanoutpos(
+ dev, pipe, GET_DISTANCE_TO_VBLANKSTART,
+ &vpos, &hpos, NULL, NULL,
+ &adev->mode_info.crtcs[pipe]->base.hwmode);
+ } while (count != amdgpu_display_vblank_get_counter(adev, pipe));
+
+ if (((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
+ (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE))) {
+ DRM_DEBUG_VBL("Query failed! stat %d\n", stat);
+ } else {
+ DRM_DEBUG_VBL("crtc %d: dist from vblank start %d\n",
+ pipe, vpos);
+
+ /* Bump counter if we are at >= leading edge of vblank,
+ * but before vsync where vpos would turn negative and
+ * the hw counter really increments.
+ */
+ if (vpos >= 0)
+ count++;
+ }
+ } else {
+ /* Fallback to use value as is. */
+ count = amdgpu_display_vblank_get_counter(adev, pipe);
+ DRM_DEBUG_VBL("NULL mode info! Returned count may be wrong.\n");
+ }
+
+ return count;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index b62c1710cab6..064ebb347074 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -407,6 +407,7 @@ struct amdgpu_crtc {
u32 line_time;
u32 wm_low;
u32 wm_high;
+ u32 lb_vblank_lead_lines;
struct drm_display_mode hw_mode;
};
@@ -528,6 +529,10 @@ struct amdgpu_framebuffer {
#define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \
((em) == ATOM_ENCODER_MODE_DP_MST))
+/* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */
+#define USE_REAL_VBLANKSTART (1 << 30)
+#define GET_DISTANCE_TO_VBLANKSTART (1 << 31)
+
void amdgpu_link_encoder_connector(struct drm_device *dev);
struct drm_connector *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 0d524384ff79..c3ce103b6a33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -100,6 +100,7 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
list_del_init(&bo->list);
mutex_unlock(&bo->adev->gem.mutex);
drm_gem_object_release(&bo->gem_base);
+ amdgpu_bo_unref(&bo->parent);
kfree(bo->metadata);
kfree(bo);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 3c2ff4567798..ea756e77b023 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -189,10 +189,9 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager);
int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager);
-int amdgpu_sa_bo_new(struct amdgpu_device *adev,
- struct amdgpu_sa_manager *sa_manager,
- struct amdgpu_sa_bo **sa_bo,
- unsigned size, unsigned align);
+int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
+ struct amdgpu_sa_bo **sa_bo,
+ unsigned size, unsigned align);
void amdgpu_sa_bo_free(struct amdgpu_device *adev,
struct amdgpu_sa_bo **sa_bo,
struct fence *fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index efed11509f4a..22a8c7d3a3ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -294,10 +294,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
struct amdgpu_device *adev = dev_get_drvdata(dev);
umode_t effective_mode = attr->mode;
- /* Skip limit attributes if DPM is not enabled */
+ /* Skip attributes if DPM is not enabled */
if (!adev->pm.dpm_enabled &&
(attr == &sensor_dev_attr_temp1_crit.dev_attr.attr ||
- attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr))
+ attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
return 0;
/* Skip fan attributes if fan is not present */
@@ -691,6 +695,9 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
{
int ret;
+ if (adev->pm.sysfs_initialized)
+ return 0;
+
if (adev->pm.funcs->get_temperature == NULL)
return 0;
adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev,
@@ -719,6 +726,8 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
return ret;
}
+ adev->pm.sysfs_initialized = true;
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index b2df348aa223..78e9b0f14661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -436,6 +436,30 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
}
}
+/**
+ * amdgpu_ring_from_fence - get ring from fence
+ *
+ * @f: fence structure
+ *
+ * Extract the ring a fence belongs to. Handles both scheduler as
+ * well as hardware fences.
+ */
+struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f)
+{
+ struct amdgpu_fence *a_fence;
+ struct amd_sched_fence *s_fence;
+
+ s_fence = to_amd_sched_fence(f);
+ if (s_fence)
+ return container_of(s_fence->sched, struct amdgpu_ring, sched);
+
+ a_fence = to_amdgpu_fence(f);
+ if (a_fence)
+ return a_fence->ring;
+
+ return NULL;
+}
+
/*
* Debugfs info
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 5cb27d525e43..8b88edb0434b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -139,25 +139,6 @@ int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
return r;
}
-static uint32_t amdgpu_sa_get_ring_from_fence(struct fence *f)
-{
- struct amdgpu_fence *a_fence;
- struct amd_sched_fence *s_fence;
-
- s_fence = to_amd_sched_fence(f);
- if (s_fence) {
- struct amdgpu_ring *ring;
-
- ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
- return ring->idx;
- }
-
- a_fence = to_amdgpu_fence(f);
- if (a_fence)
- return a_fence->ring->idx;
- return 0;
-}
-
static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
{
struct amdgpu_sa_manager *sa_manager = sa_bo->manager;
@@ -318,7 +299,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
}
if (best_bo) {
- uint32_t idx = amdgpu_sa_get_ring_from_fence(best_bo->fence);
+ uint32_t idx = amdgpu_ring_from_fence(best_bo->fence)->idx;
++tries[idx];
sa_manager->hole = best_bo->olist.prev;
@@ -330,13 +311,13 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
return false;
}
-int amdgpu_sa_bo_new(struct amdgpu_device *adev,
- struct amdgpu_sa_manager *sa_manager,
+int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
struct amdgpu_sa_bo **sa_bo,
unsigned size, unsigned align)
{
struct fence *fences[AMDGPU_MAX_RINGS];
unsigned tries[AMDGPU_MAX_RINGS];
+ unsigned count;
int i, r;
signed long t;
@@ -371,13 +352,18 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
/* see if we can skip over some allocations */
} while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
- spin_unlock(&sa_manager->wq.lock);
- t = amdgpu_fence_wait_any(fences, AMDGPU_MAX_RINGS,
- false, MAX_SCHEDULE_TIMEOUT);
- r = (t > 0) ? 0 : t;
- spin_lock(&sa_manager->wq.lock);
- /* if we have nothing to wait for block */
- if (r == -ENOENT) {
+ for (i = 0, count = 0; i < AMDGPU_MAX_RINGS; ++i)
+ if (fences[i])
+ fences[count++] = fences[i];
+
+ if (count) {
+ spin_unlock(&sa_manager->wq.lock);
+ t = fence_wait_any_timeout(fences, count, false,
+ MAX_SCHEDULE_TIMEOUT);
+ r = (t > 0) ? 0 : t;
+ spin_lock(&sa_manager->wq.lock);
+ } else {
+ /* if we have nothing to wait for block */
r = wait_event_interruptible_locked(
sa_manager->wq,
amdgpu_sa_event(sa_manager, size, align)
@@ -406,7 +392,7 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
if (fence && !fence_is_signaled(fence)) {
uint32_t idx;
(*sa_bo)->fence = fence_get(fence);
- idx = amdgpu_sa_get_ring_from_fence(fence);
+ idx = amdgpu_ring_from_fence(fence)->idx;
list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
} else {
amdgpu_sa_bo_remove_locked(*sa_bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 2e946b2cad88..438c05254695 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -26,6 +26,7 @@
#include <linux/sched.h>
#include <drm/drmP.h>
#include "amdgpu.h"
+#include "amdgpu_trace.h"
static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
{
@@ -44,24 +45,20 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
return NULL;
}
job = to_amdgpu_job(sched_job);
- mutex_lock(&job->job_lock);
- r = amdgpu_ib_schedule(job->adev,
- job->num_ibs,
- job->ibs,
- job->base.owner);
+ trace_amdgpu_sched_run_job(job);
+ r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner);
if (r) {
DRM_ERROR("Error scheduling IBs (%d)\n", r);
goto err;
}
- fence = amdgpu_fence_ref(job->ibs[job->num_ibs - 1].fence);
+ fence = job->ibs[job->num_ibs - 1].fence;
+ fence_get(&fence->base);
err:
if (job->free_job)
job->free_job(job);
- mutex_unlock(&job->job_lock);
- fence_put(&job->base.s_fence->base);
kfree(job);
return fence ? &fence->base : NULL;
}
@@ -87,21 +84,19 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
return -ENOMEM;
job->base.sched = &ring->sched;
job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
+ job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
+ if (!job->base.s_fence) {
+ kfree(job);
+ return -ENOMEM;
+ }
+ *f = fence_get(&job->base.s_fence->base);
+
job->adev = adev;
job->ibs = ibs;
job->num_ibs = num_ibs;
- job->base.owner = owner;
- mutex_init(&job->job_lock);
+ job->owner = owner;
job->free_job = free_job;
- mutex_lock(&job->job_lock);
- r = amd_sched_entity_push_job(&job->base);
- if (r) {
- mutex_unlock(&job->job_lock);
- kfree(job);
- return r;
- }
- *f = fence_get(&job->base.s_fence->base);
- mutex_unlock(&job->job_lock);
+ amd_sched_entity_push_job(&job->base);
} else {
r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
index ff3ca52ec6fe..1caaf201b708 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
@@ -40,7 +40,7 @@ int amdgpu_semaphore_create(struct amdgpu_device *adev,
if (*semaphore == NULL) {
return -ENOMEM;
}
- r = amdgpu_sa_bo_new(adev, &adev->ring_tmp_bo,
+ r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
&(*semaphore)->sa_bo, 8, 8);
if (r) {
kfree(*semaphore);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 4921de15b451..dd005c336c97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -87,6 +87,15 @@ static bool amdgpu_sync_test_owner(struct fence *f, void *owner)
return false;
}
+static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence)
+{
+ if (*keep && fence_is_later(*keep, fence))
+ return;
+
+ fence_put(*keep);
+ *keep = fence_get(fence);
+}
+
/**
* amdgpu_sync_fence - remember to sync to this fence
*
@@ -99,35 +108,21 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
{
struct amdgpu_sync_entry *e;
struct amdgpu_fence *fence;
- struct amdgpu_fence *other;
- struct fence *tmp, *later;
if (!f)
return 0;
if (amdgpu_sync_same_dev(adev, f) &&
- amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) {
- if (sync->last_vm_update) {
- tmp = sync->last_vm_update;
- BUG_ON(f->context != tmp->context);
- later = (f->seqno - tmp->seqno <= INT_MAX) ? f : tmp;
- sync->last_vm_update = fence_get(later);
- fence_put(tmp);
- } else
- sync->last_vm_update = fence_get(f);
- }
+ amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM))
+ amdgpu_sync_keep_later(&sync->last_vm_update, f);
fence = to_amdgpu_fence(f);
if (!fence || fence->ring->adev != adev) {
hash_for_each_possible(sync->fences, e, node, f->context) {
- struct fence *new;
if (unlikely(e->fence->context != f->context))
continue;
- new = fence_get(fence_later(e->fence, f));
- if (new) {
- fence_put(e->fence);
- e->fence = new;
- }
+
+ amdgpu_sync_keep_later(&e->fence, f);
return 0;
}
@@ -140,10 +135,7 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
return 0;
}
- other = sync->sync_to[fence->ring->idx];
- sync->sync_to[fence->ring->idx] = amdgpu_fence_ref(
- amdgpu_fence_later(fence, other));
- amdgpu_fence_unref(&other);
+ amdgpu_sync_keep_later(&sync->sync_to[fence->ring->idx], f);
return 0;
}
@@ -199,8 +191,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
* for other VM updates and moves.
*/
fence_owner = amdgpu_sync_get_owner(f);
- if ((owner != AMDGPU_FENCE_OWNER_MOVE) &&
- (fence_owner != AMDGPU_FENCE_OWNER_MOVE) &&
+ if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
+ (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
((owner == AMDGPU_FENCE_OWNER_VM) !=
(fence_owner == AMDGPU_FENCE_OWNER_VM)))
continue;
@@ -262,11 +254,11 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync)
return 0;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_fence *fence = sync->sync_to[i];
+ struct fence *fence = sync->sync_to[i];
if (!fence)
continue;
- r = fence_wait(&fence->base, false);
+ r = fence_wait(fence, false);
if (r)
return r;
}
@@ -291,9 +283,14 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
int i, r;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_fence *fence = sync->sync_to[i];
- struct amdgpu_semaphore *semaphore;
struct amdgpu_ring *other = adev->rings[i];
+ struct amdgpu_semaphore *semaphore;
+ struct amdgpu_fence *fence;
+
+ if (!sync->sync_to[i])
+ continue;
+
+ fence = to_amdgpu_fence(sync->sync_to[i]);
/* check if we really need to sync */
if (!amdgpu_fence_need_sync(fence, ring))
@@ -305,8 +302,14 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
return -EINVAL;
}
- if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores ||
- (count >= AMDGPU_NUM_SYNCS)) {
+ if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) {
+ r = fence_wait(&fence->base, true);
+ if (r)
+ return r;
+ continue;
+ }
+
+ if (count >= AMDGPU_NUM_SYNCS) {
/* not enough room, wait manually */
r = fence_wait(&fence->base, false);
if (r)
@@ -378,7 +381,7 @@ void amdgpu_sync_free(struct amdgpu_device *adev,
amdgpu_semaphore_free(adev, &sync->semaphores[i], fence);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
- amdgpu_fence_unref(&sync->sync_to[i]);
+ fence_put(sync->sync_to[i]);
fence_put(sync->last_vm_update);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 76ecbaf72a2e..8f9834ab1bd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -48,6 +48,57 @@ TRACE_EVENT(amdgpu_cs,
__entry->fences)
);
+TRACE_EVENT(amdgpu_cs_ioctl,
+ TP_PROTO(struct amdgpu_job *job),
+ TP_ARGS(job),
+ TP_STRUCT__entry(
+ __field(struct amdgpu_device *, adev)
+ __field(struct amd_sched_job *, sched_job)
+ __field(struct amdgpu_ib *, ib)
+ __field(struct fence *, fence)
+ __field(char *, ring_name)
+ __field(u32, num_ibs)
+ ),
+
+ TP_fast_assign(
+ __entry->adev = job->adev;
+ __entry->sched_job = &job->base;
+ __entry->ib = job->ibs;
+ __entry->fence = &job->base.s_fence->base;
+ __entry->ring_name = job->ibs[0].ring->name;
+ __entry->num_ibs = job->num_ibs;
+ ),
+ TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
+ __entry->adev, __entry->sched_job, __entry->ib,
+ __entry->fence, __entry->ring_name, __entry->num_ibs)
+);
+
+TRACE_EVENT(amdgpu_sched_run_job,
+ TP_PROTO(struct amdgpu_job *job),
+ TP_ARGS(job),
+ TP_STRUCT__entry(
+ __field(struct amdgpu_device *, adev)
+ __field(struct amd_sched_job *, sched_job)
+ __field(struct amdgpu_ib *, ib)
+ __field(struct fence *, fence)
+ __field(char *, ring_name)
+ __field(u32, num_ibs)
+ ),
+
+ TP_fast_assign(
+ __entry->adev = job->adev;
+ __entry->sched_job = &job->base;
+ __entry->ib = job->ibs;
+ __entry->fence = &job->base.s_fence->base;
+ __entry->ring_name = job->ibs[0].ring->name;
+ __entry->num_ibs = job->num_ibs;
+ ),
+ TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
+ __entry->adev, __entry->sched_job, __entry->ib,
+ __entry->fence, __entry->ring_name, __entry->num_ibs)
+);
+
+
TRACE_EVENT(amdgpu_vm_grab_id,
TP_PROTO(unsigned vmid, int ring),
TP_ARGS(vmid, ring),
@@ -196,49 +247,6 @@ TRACE_EVENT(amdgpu_bo_list_set,
TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
);
-DECLARE_EVENT_CLASS(amdgpu_fence_request,
-
- TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
-
- TP_ARGS(dev, ring, seqno),
-
- TP_STRUCT__entry(
- __field(u32, dev)
- __field(int, ring)
- __field(u32, seqno)
- ),
-
- TP_fast_assign(
- __entry->dev = dev->primary->index;
- __entry->ring = ring;
- __entry->seqno = seqno;
- ),
-
- TP_printk("dev=%u, ring=%d, seqno=%u",
- __entry->dev, __entry->ring, __entry->seqno)
-);
-
-DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_emit,
-
- TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
-
- TP_ARGS(dev, ring, seqno)
-);
-
-DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_wait_begin,
-
- TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
-
- TP_ARGS(dev, ring, seqno)
-);
-
-DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_wait_end,
-
- TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
-
- TP_ARGS(dev, ring, seqno)
-);
-
DECLARE_EVENT_CLASS(amdgpu_semaphore_request,
TP_PROTO(int ring, struct amdgpu_semaphore *sem),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index a089e69e9927..8a1752ff3d8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -587,9 +587,13 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
int r;
- if (gtt->userptr)
- amdgpu_ttm_tt_pin_userptr(ttm);
-
+ if (gtt->userptr) {
+ r = amdgpu_ttm_tt_pin_userptr(ttm);
+ if (r) {
+ DRM_ERROR("failed to pin userptr\n");
+ return r;
+ }
+ }
gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);
if (!ttm->num_pages) {
WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
@@ -797,11 +801,12 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
if (mem && mem->mem_type != TTM_PL_SYSTEM)
flags |= AMDGPU_PTE_VALID;
- if (mem && mem->mem_type == TTM_PL_TT)
+ if (mem && mem->mem_type == TTM_PL_TT) {
flags |= AMDGPU_PTE_SYSTEM;
- if (!ttm || ttm->caching_state == tt_cached)
- flags |= AMDGPU_PTE_SNOOPED;
+ if (ttm->caching_state == tt_cached)
+ flags |= AMDGPU_PTE_SNOOPED;
+ }
if (adev->asic_type >= CHIP_TOPAZ)
flags |= AMDGPU_PTE_EXECUTABLE;
@@ -1041,7 +1046,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
WARN_ON(ib->length_dw > num_dw);
r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
&amdgpu_vm_free_job,
- AMDGPU_FENCE_OWNER_MOVE,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
fence);
if (r)
goto error_free;
@@ -1073,10 +1078,10 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
ret = drm_mm_dump_table(m, mm);
spin_unlock(&glob->lru_lock);
if (ttm_pl == TTM_PL_VRAM)
- seq_printf(m, "man size:%llu pages, ram usage:%luMB, vis usage:%luMB\n",
+ seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
adev->mman.bdev.man[ttm_pl].size,
- atomic64_read(&adev->vram_usage) >> 20,
- atomic64_read(&adev->vram_vis_usage) >> 20);
+ (u64)atomic64_read(&adev->vram_usage) >> 20,
+ (u64)atomic64_read(&adev->vram_vis_usage) >> 20);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 03f0c3bae516..a745eeeb5d82 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -392,7 +392,10 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
ib->ptr[ib->length_dw++] = handle;
- ib->ptr[ib->length_dw++] = 0x00000030; /* len */
+ if ((ring->adev->vce.fw_version >> 24) >= 52)
+ ib->ptr[ib->length_dw++] = 0x00000040; /* len */
+ else
+ ib->ptr[ib->length_dw++] = 0x00000030; /* len */
ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000042;
@@ -404,6 +407,12 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib->ptr[ib->length_dw++] = 0x00000100;
ib->ptr[ib->length_dw++] = 0x0000000c;
ib->ptr[ib->length_dw++] = 0x00000000;
+ if ((ring->adev->vce.fw_version >> 24) >= 52) {
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ }
ib->ptr[ib->length_dw++] = 0x00000014; /* len */
ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ff26e330ccd6..b53d273eb7a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -135,7 +135,7 @@ struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync)
{
- struct amdgpu_fence *best[AMDGPU_MAX_RINGS] = {};
+ struct fence *best[AMDGPU_MAX_RINGS] = {};
struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
struct amdgpu_device *adev = ring->adev;
@@ -143,10 +143,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
unsigned i;
/* check if the id is still valid */
- if (vm_id->id && vm_id->last_id_use &&
- vm_id->last_id_use == adev->vm_manager.active[vm_id->id]) {
- trace_amdgpu_vm_grab_id(vm_id->id, ring->idx);
- return 0;
+ if (vm_id->id) {
+ unsigned id = vm_id->id;
+ long owner;
+
+ owner = atomic_long_read(&adev->vm_manager.ids[id].owner);
+ if (owner == (long)vm) {
+ trace_amdgpu_vm_grab_id(vm_id->id, ring->idx);
+ return 0;
+ }
}
/* we definately need to flush */
@@ -154,7 +159,8 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
/* skip over VMID 0, since it is the system VM */
for (i = 1; i < adev->vm_manager.nvm; ++i) {
- struct amdgpu_fence *fence = adev->vm_manager.active[i];
+ struct fence *fence = adev->vm_manager.ids[i].active;
+ struct amdgpu_ring *fring;
if (fence == NULL) {
/* found a free one */
@@ -163,21 +169,23 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
return 0;
}
- if (amdgpu_fence_is_earlier(fence, best[fence->ring->idx])) {
- best[fence->ring->idx] = fence;
- choices[fence->ring == ring ? 0 : 1] = i;
+ fring = amdgpu_ring_from_fence(fence);
+ if (best[fring->idx] == NULL ||
+ fence_is_later(best[fring->idx], fence)) {
+ best[fring->idx] = fence;
+ choices[fring == ring ? 0 : 1] = i;
}
}
for (i = 0; i < 2; ++i) {
if (choices[i]) {
- struct amdgpu_fence *fence;
+ struct fence *fence;
- fence = adev->vm_manager.active[choices[i]];
+ fence = adev->vm_manager.ids[choices[i]].active;
vm_id->id = choices[i];
trace_amdgpu_vm_grab_id(choices[i], ring->idx);
- return amdgpu_sync_fence(ring->adev, sync, &fence->base);
+ return amdgpu_sync_fence(ring->adev, sync, fence);
}
}
@@ -204,24 +212,21 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
struct fence *flushed_updates = vm_id->flushed_updates;
- bool is_earlier = false;
+ bool is_later;
- if (flushed_updates && updates) {
- BUG_ON(flushed_updates->context != updates->context);
- is_earlier = (updates->seqno - flushed_updates->seqno <=
- INT_MAX) ? true : false;
- }
-
- if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates ||
- is_earlier) {
+ if (!flushed_updates)
+ is_later = true;
+ else if (!updates)
+ is_later = false;
+ else
+ is_later = fence_is_later(updates, flushed_updates);
+ if (pd_addr != vm_id->pd_gpu_addr || is_later) {
trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
- if (is_earlier) {
+ if (is_later) {
vm_id->flushed_updates = fence_get(updates);
fence_put(flushed_updates);
}
- if (!flushed_updates)
- vm_id->flushed_updates = fence_get(updates);
vm_id->pd_gpu_addr = pd_addr;
amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
}
@@ -241,16 +246,14 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
*/
void amdgpu_vm_fence(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
- struct amdgpu_fence *fence)
+ struct fence *fence)
{
- unsigned ridx = fence->ring->idx;
- unsigned vm_id = vm->ids[ridx].id;
-
- amdgpu_fence_unref(&adev->vm_manager.active[vm_id]);
- adev->vm_manager.active[vm_id] = amdgpu_fence_ref(fence);
+ struct amdgpu_ring *ring = amdgpu_ring_from_fence(fence);
+ unsigned vm_id = vm->ids[ring->idx].id;
- amdgpu_fence_unref(&vm->ids[ridx].last_id_use);
- vm->ids[ridx].last_id_use = amdgpu_fence_ref(fence);
+ fence_put(adev->vm_manager.ids[vm_id].active);
+ adev->vm_manager.ids[vm_id].active = fence_get(fence);
+ atomic_long_set(&adev->vm_manager.ids[vm_id].owner, (long)vm);
}
/**
@@ -329,6 +332,8 @@ int amdgpu_vm_free_job(struct amdgpu_job *job)
*
* @adev: amdgpu_device pointer
* @bo: bo to clear
+ *
+ * need to reserve bo first before calling it.
*/
static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
struct amdgpu_bo *bo)
@@ -340,24 +345,20 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
uint64_t addr;
int r;
- r = amdgpu_bo_reserve(bo, false);
- if (r)
- return r;
-
r = reservation_object_reserve_shared(bo->tbo.resv);
if (r)
return r;
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
if (r)
- goto error_unreserve;
+ goto error;
addr = amdgpu_bo_gpu_offset(bo);
entries = amdgpu_bo_size(bo) / 8;
ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
if (!ib)
- goto error_unreserve;
+ goto error;
r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib);
if (r)
@@ -375,16 +376,14 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
if (!r)
amdgpu_bo_fence(bo, fence, true);
fence_put(fence);
- if (amdgpu_enable_scheduler) {
- amdgpu_bo_unreserve(bo);
+ if (amdgpu_enable_scheduler)
return 0;
- }
+
error_free:
amdgpu_ib_free(adev, ib);
kfree(ib);
-error_unreserve:
- amdgpu_bo_unreserve(bo);
+error:
return r;
}
@@ -886,17 +885,21 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping;
int r;
+ spin_lock(&vm->freed_lock);
while (!list_empty(&vm->freed)) {
mapping = list_first_entry(&vm->freed,
struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list);
-
+ spin_unlock(&vm->freed_lock);
r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, 0, 0, NULL);
kfree(mapping);
if (r)
return r;
+ spin_lock(&vm->freed_lock);
}
+ spin_unlock(&vm->freed_lock);
+
return 0;
}
@@ -923,8 +926,9 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
bo_va = list_first_entry(&vm->invalidated,
struct amdgpu_bo_va, vm_status);
spin_unlock(&vm->status_lock);
-
+ mutex_lock(&bo_va->mutex);
r = amdgpu_vm_bo_update(adev, bo_va, NULL);
+ mutex_unlock(&bo_va->mutex);
if (r)
return r;
@@ -968,7 +972,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
INIT_LIST_HEAD(&bo_va->valids);
INIT_LIST_HEAD(&bo_va->invalids);
INIT_LIST_HEAD(&bo_va->vm_status);
-
+ mutex_init(&bo_va->mutex);
list_add_tail(&bo_va->bo_list, &bo->va);
return bo_va;
@@ -986,7 +990,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
* Add a mapping of the BO at the specefied addr into the VM.
* Returns 0 for success, error for failure.
*
- * Object has to be reserved and gets unreserved by this function!
+ * Object has to be reserved and unreserved outside!
*/
int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
@@ -1002,30 +1006,27 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
/* validate the parameters */
if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
- size == 0 || size & AMDGPU_GPU_PAGE_MASK) {
- amdgpu_bo_unreserve(bo_va->bo);
+ size == 0 || size & AMDGPU_GPU_PAGE_MASK)
return -EINVAL;
- }
/* make sure object fit at this offset */
eaddr = saddr + size;
- if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo))) {
- amdgpu_bo_unreserve(bo_va->bo);
+ if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo)))
return -EINVAL;
- }
last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
if (last_pfn > adev->vm_manager.max_pfn) {
dev_err(adev->dev, "va above limit (0x%08X > 0x%08X)\n",
last_pfn, adev->vm_manager.max_pfn);
- amdgpu_bo_unreserve(bo_va->bo);
return -EINVAL;
}
saddr /= AMDGPU_GPU_PAGE_SIZE;
eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ spin_lock(&vm->it_lock);
it = interval_tree_iter_first(&vm->va, saddr, eaddr - 1);
+ spin_unlock(&vm->it_lock);
if (it) {
struct amdgpu_bo_va_mapping *tmp;
tmp = container_of(it, struct amdgpu_bo_va_mapping, it);
@@ -1033,14 +1034,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
"0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr,
tmp->it.start, tmp->it.last + 1);
- amdgpu_bo_unreserve(bo_va->bo);
r = -EINVAL;
goto error;
}
mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
if (!mapping) {
- amdgpu_bo_unreserve(bo_va->bo);
r = -ENOMEM;
goto error;
}
@@ -1051,8 +1050,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
mapping->offset = offset;
mapping->flags = flags;
+ mutex_lock(&bo_va->mutex);
list_add(&mapping->list, &bo_va->invalids);
+ mutex_unlock(&bo_va->mutex);
+ spin_lock(&vm->it_lock);
interval_tree_insert(&mapping->it, &vm->va);
+ spin_unlock(&vm->it_lock);
trace_amdgpu_vm_bo_map(bo_va, mapping);
/* Make sure the page tables are allocated */
@@ -1064,8 +1067,6 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
if (eaddr > vm->max_pde_used)
vm->max_pde_used = eaddr;
- amdgpu_bo_unreserve(bo_va->bo);
-
/* walk over the address space and allocate the page tables */
for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) {
struct reservation_object *resv = vm->page_directory->tbo.resv;
@@ -1074,16 +1075,19 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
if (vm->page_tables[pt_idx].bo)
continue;
- ww_mutex_lock(&resv->lock, NULL);
r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
AMDGPU_GPU_PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
NULL, resv, &pt);
- ww_mutex_unlock(&resv->lock);
if (r)
goto error_free;
+ /* Keep a reference to the page table to avoid freeing
+ * them up in the wrong order.
+ */
+ pt->parent = amdgpu_bo_ref(vm->page_directory);
+
r = amdgpu_vm_clear_bo(adev, pt);
if (r) {
amdgpu_bo_unref(&pt);
@@ -1098,7 +1102,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
error_free:
list_del(&mapping->list);
+ spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va);
+ spin_unlock(&vm->it_lock);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
kfree(mapping);
@@ -1116,7 +1122,7 @@ error:
* Remove a mapping of the BO at the specefied addr from the VM.
* Returns 0 for success, error for failure.
*
- * Object has to be reserved and gets unreserved by this function!
+ * Object has to be reserved and unreserved outside!
*/
int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
@@ -1127,7 +1133,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
bool valid = true;
saddr /= AMDGPU_GPU_PAGE_SIZE;
-
+ mutex_lock(&bo_va->mutex);
list_for_each_entry(mapping, &bo_va->valids, list) {
if (mapping->it.start == saddr)
break;
@@ -1142,20 +1148,24 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
}
if (&mapping->list == &bo_va->invalids) {
- amdgpu_bo_unreserve(bo_va->bo);
+ mutex_unlock(&bo_va->mutex);
return -ENOENT;
}
}
-
+ mutex_unlock(&bo_va->mutex);
list_del(&mapping->list);
+ spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va);
+ spin_unlock(&vm->it_lock);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
- if (valid)
+ if (valid) {
+ spin_lock(&vm->freed_lock);
list_add(&mapping->list, &vm->freed);
- else
+ spin_unlock(&vm->freed_lock);
+ } else {
kfree(mapping);
- amdgpu_bo_unreserve(bo_va->bo);
+ }
return 0;
}
@@ -1184,17 +1194,23 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
list_del(&mapping->list);
+ spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va);
+ spin_unlock(&vm->it_lock);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
+ spin_lock(&vm->freed_lock);
list_add(&mapping->list, &vm->freed);
+ spin_unlock(&vm->freed_lock);
}
list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
list_del(&mapping->list);
+ spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va);
+ spin_unlock(&vm->it_lock);
kfree(mapping);
}
-
fence_put(bo_va->last_pt_update);
+ mutex_destroy(&bo_va->mutex);
kfree(bo_va);
}
@@ -1238,15 +1254,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
vm->ids[i].id = 0;
vm->ids[i].flushed_updates = NULL;
- vm->ids[i].last_id_use = NULL;
}
- mutex_init(&vm->mutex);
vm->va = RB_ROOT;
spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->invalidated);
INIT_LIST_HEAD(&vm->cleared);
INIT_LIST_HEAD(&vm->freed);
-
+ spin_lock_init(&vm->it_lock);
+ spin_lock_init(&vm->freed_lock);
pd_size = amdgpu_vm_directory_size(adev);
pd_entries = amdgpu_vm_num_pdes(adev);
@@ -1266,8 +1281,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
NULL, NULL, &vm->page_directory);
if (r)
return r;
-
+ r = amdgpu_bo_reserve(vm->page_directory, false);
+ if (r) {
+ amdgpu_bo_unref(&vm->page_directory);
+ vm->page_directory = NULL;
+ return r;
+ }
r = amdgpu_vm_clear_bo(adev, vm->page_directory);
+ amdgpu_bo_unreserve(vm->page_directory);
if (r) {
amdgpu_bo_unref(&vm->page_directory);
vm->page_directory = NULL;
@@ -1310,11 +1331,27 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_bo_unref(&vm->page_directory);
fence_put(vm->page_directory_fence);
-
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ unsigned id = vm->ids[i].id;
+
+ atomic_long_cmpxchg(&adev->vm_manager.ids[id].owner,
+ (long)vm, 0);
fence_put(vm->ids[i].flushed_updates);
- amdgpu_fence_unref(&vm->ids[i].last_id_use);
}
- mutex_destroy(&vm->mutex);
+}
+
+/**
+ * amdgpu_vm_manager_fini - cleanup VM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Cleanup the VM manager and free resources.
+ */
+void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ for (i = 0; i < AMDGPU_NUM_VM; ++i)
+ fence_put(adev->vm_manager.ids[i].active);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index a1a35a5df8e7..57a2e347f04d 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -6569,12 +6569,12 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
- cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
+ cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
break;
case AMDGPU_IRQ_STATE_ENABLE:
cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
- cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
+ cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
break;
default:
@@ -6586,12 +6586,12 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
- cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
+ cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
break;
case AMDGPU_IRQ_STATE_ENABLE:
cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
- cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
+ cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index a6ea2d8e85df..4dcc8fba5792 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -255,6 +255,24 @@ static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
}
+static void dce_v10_0_pageflip_interrupt_init(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ /* Enable pflip interrupts */
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ amdgpu_irq_get(adev, &adev->pageflip_irq, i);
+}
+
+static void dce_v10_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ /* Disable pflip interrupts */
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ amdgpu_irq_put(adev, &adev->pageflip_irq, i);
+}
+
/**
* dce_v10_0_page_flip - pageflip callback.
*
@@ -262,46 +280,22 @@ static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
* @crtc_id: crtc to cleanup pageflip on
* @crtc_base: new address of the crtc (GPU MC address)
*
- * Does the actual pageflip (evergreen+).
- * During vblank we take the crtc lock and wait for the update_pending
- * bit to go high, when it does, we release the lock, and allow the
- * double buffered update to take place.
- * Returns the current update pending status.
+ * Triggers the actual pageflip by updating the primary
+ * surface base address.
*/
static void dce_v10_0_page_flip(struct amdgpu_device *adev,
int crtc_id, u64 crtc_base)
{
struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
- u32 tmp = RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset);
- int i;
-
- /* Lock the graphics update lock */
- tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1);
- WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
-
- /* update the scanout addresses */
- WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(crtc_base));
- WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- lower_32_bits(crtc_base));
+ /* update the primary scanout address */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
upper_32_bits(crtc_base));
+ /* writing to the low address triggers the update */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
lower_32_bits(crtc_base));
-
- /* Wait for update_pending to go high. */
- for (i = 0; i < adev->usec_timeout; i++) {
- if (RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset) &
- GRPH_UPDATE__GRPH_SURFACE_UPDATE_PENDING_MASK)
- break;
- udelay(1);
- }
- DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
-
- /* Unlock the lock, so double-buffering can take place inside vblank */
- tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0);
- WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
+ /* post the write */
+ RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
}
static int dce_v10_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
@@ -1256,7 +1250,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
u32 pixel_period;
u32 line_time = 0;
u32 latency_watermark_a = 0, latency_watermark_b = 0;
- u32 tmp, wm_mask;
+ u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
if (amdgpu_crtc->base.enabled && num_heads && mode) {
pixel_period = 1000000 / (u32)mode->clock;
@@ -1339,6 +1333,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
(adev->mode_info.disp_priority == 2)) {
DRM_DEBUG_KMS("force priority to high\n");
}
+ lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
}
/* select wm A */
@@ -1363,6 +1358,8 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
amdgpu_crtc->line_time = line_time;
amdgpu_crtc->wm_high = latency_watermark_a;
amdgpu_crtc->wm_low = latency_watermark_b;
+ /* Save number of lines the linebuffer leads before the scanout */
+ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
}
/**
@@ -2700,9 +2697,10 @@ static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
dce_v10_0_vga_enable(crtc, true);
amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
dce_v10_0_vga_enable(crtc, false);
- /* Make sure VBLANK interrupt is still enabled */
+ /* Make sure VBLANK and PFLIP interrupts are still enabled */
type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
amdgpu_irq_update(adev, &adev->crtc_irq, type);
+ amdgpu_irq_update(adev, &adev->pageflip_irq, type);
drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
dce_v10_0_crtc_load_lut(crtc);
break;
@@ -3063,6 +3061,8 @@ static int dce_v10_0_hw_init(void *handle)
dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
}
+ dce_v10_0_pageflip_interrupt_init(adev);
+
return 0;
}
@@ -3077,6 +3077,8 @@ static int dce_v10_0_hw_fini(void *handle)
dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
}
+ dce_v10_0_pageflip_interrupt_fini(adev);
+
return 0;
}
@@ -3364,7 +3366,6 @@ static int dce_v10_0_pageflip_irq(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
- amdgpu_irq_put(adev, &adev->pageflip_irq, crtc_id);
queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 784afb5978ac..8f1e51128b33 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -245,6 +245,24 @@ static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
}
+static void dce_v11_0_pageflip_interrupt_init(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ /* Enable pflip interrupts */
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ amdgpu_irq_get(adev, &adev->pageflip_irq, i);
+}
+
+static void dce_v11_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ /* Disable pflip interrupts */
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ amdgpu_irq_put(adev, &adev->pageflip_irq, i);
+}
+
/**
* dce_v11_0_page_flip - pageflip callback.
*
@@ -252,46 +270,22 @@ static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
* @crtc_id: crtc to cleanup pageflip on
* @crtc_base: new address of the crtc (GPU MC address)
*
- * Does the actual pageflip (evergreen+).
- * During vblank we take the crtc lock and wait for the update_pending
- * bit to go high, when it does, we release the lock, and allow the
- * double buffered update to take place.
- * Returns the current update pending status.
+ * Triggers the actual pageflip by updating the primary
+ * surface base address.
*/
static void dce_v11_0_page_flip(struct amdgpu_device *adev,
int crtc_id, u64 crtc_base)
{
struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
- u32 tmp = RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset);
- int i;
-
- /* Lock the graphics update lock */
- tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1);
- WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
/* update the scanout addresses */
- WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(crtc_base));
- WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- lower_32_bits(crtc_base));
-
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
upper_32_bits(crtc_base));
+ /* writing to the low address triggers the update */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
lower_32_bits(crtc_base));
-
- /* Wait for update_pending to go high. */
- for (i = 0; i < adev->usec_timeout; i++) {
- if (RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset) &
- GRPH_UPDATE__GRPH_SURFACE_UPDATE_PENDING_MASK)
- break;
- udelay(1);
- }
- DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
-
- /* Unlock the lock, so double-buffering can take place inside vblank */
- tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0);
- WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
+ /* post the write */
+ RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
}
static int dce_v11_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
@@ -1244,7 +1238,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
u32 pixel_period;
u32 line_time = 0;
u32 latency_watermark_a = 0, latency_watermark_b = 0;
- u32 tmp, wm_mask;
+ u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
if (amdgpu_crtc->base.enabled && num_heads && mode) {
pixel_period = 1000000 / (u32)mode->clock;
@@ -1327,6 +1321,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
(adev->mode_info.disp_priority == 2)) {
DRM_DEBUG_KMS("force priority to high\n");
}
+ lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
}
/* select wm A */
@@ -1351,6 +1346,8 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
amdgpu_crtc->line_time = line_time;
amdgpu_crtc->wm_high = latency_watermark_a;
amdgpu_crtc->wm_low = latency_watermark_b;
+ /* Save number of lines the linebuffer leads before the scanout */
+ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
}
/**
@@ -2689,9 +2686,10 @@ static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
dce_v11_0_vga_enable(crtc, true);
amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
dce_v11_0_vga_enable(crtc, false);
- /* Make sure VBLANK interrupt is still enabled */
+ /* Make sure VBLANK and PFLIP interrupts are still enabled */
type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
amdgpu_irq_update(adev, &adev->crtc_irq, type);
+ amdgpu_irq_update(adev, &adev->pageflip_irq, type);
drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
dce_v11_0_crtc_load_lut(crtc);
break;
@@ -3056,6 +3054,8 @@ static int dce_v11_0_hw_init(void *handle)
dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
}
+ dce_v11_0_pageflip_interrupt_init(adev);
+
return 0;
}
@@ -3070,6 +3070,8 @@ static int dce_v11_0_hw_fini(void *handle)
dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
}
+ dce_v11_0_pageflip_interrupt_fini(adev);
+
return 0;
}
@@ -3357,7 +3359,6 @@ static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
- amdgpu_irq_put(adev, &adev->pageflip_irq, crtc_id);
queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 00c34f87ac20..42d954dc436d 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -204,6 +204,24 @@ static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
}
+static void dce_v8_0_pageflip_interrupt_init(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ /* Enable pflip interrupts */
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ amdgpu_irq_get(adev, &adev->pageflip_irq, i);
+}
+
+static void dce_v8_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ /* Disable pflip interrupts */
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ amdgpu_irq_put(adev, &adev->pageflip_irq, i);
+}
+
/**
* dce_v8_0_page_flip - pageflip callback.
*
@@ -211,46 +229,22 @@ static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
* @crtc_id: crtc to cleanup pageflip on
* @crtc_base: new address of the crtc (GPU MC address)
*
- * Does the actual pageflip (evergreen+).
- * During vblank we take the crtc lock and wait for the update_pending
- * bit to go high, when it does, we release the lock, and allow the
- * double buffered update to take place.
- * Returns the current update pending status.
+ * Triggers the actual pageflip by updating the primary
+ * surface base address.
*/
static void dce_v8_0_page_flip(struct amdgpu_device *adev,
int crtc_id, u64 crtc_base)
{
struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
- u32 tmp = RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset);
- int i;
-
- /* Lock the graphics update lock */
- tmp |= GRPH_UPDATE__GRPH_UPDATE_LOCK_MASK;
- WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
-
- /* update the scanout addresses */
- WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(crtc_base));
- WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- (u32)crtc_base);
+ /* update the primary scanout addresses */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
upper_32_bits(crtc_base));
+ /* writing to the low address triggers the update */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- (u32)crtc_base);
-
- /* Wait for update_pending to go high. */
- for (i = 0; i < adev->usec_timeout; i++) {
- if (RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset) &
- GRPH_UPDATE__GRPH_SURFACE_UPDATE_PENDING_MASK)
- break;
- udelay(1);
- }
- DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
-
- /* Unlock the lock, so double-buffering can take place inside vblank */
- tmp &= ~GRPH_UPDATE__GRPH_UPDATE_LOCK_MASK;
- WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
+ lower_32_bits(crtc_base));
+ /* post the write */
+ RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
}
static int dce_v8_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
@@ -1199,7 +1193,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
u32 pixel_period;
u32 line_time = 0;
u32 latency_watermark_a = 0, latency_watermark_b = 0;
- u32 tmp, wm_mask;
+ u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
if (amdgpu_crtc->base.enabled && num_heads && mode) {
pixel_period = 1000000 / (u32)mode->clock;
@@ -1282,6 +1276,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
(adev->mode_info.disp_priority == 2)) {
DRM_DEBUG_KMS("force priority to high\n");
}
+ lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
}
/* select wm A */
@@ -1308,6 +1303,8 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
amdgpu_crtc->line_time = line_time;
amdgpu_crtc->wm_high = latency_watermark_a;
amdgpu_crtc->wm_low = latency_watermark_b;
+ /* Save number of lines the linebuffer leads before the scanout */
+ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
}
/**
@@ -2612,9 +2609,10 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
dce_v8_0_vga_enable(crtc, true);
amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
dce_v8_0_vga_enable(crtc, false);
- /* Make sure VBLANK interrupt is still enabled */
+ /* Make sure VBLANK and PFLIP interrupts are still enabled */
type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
amdgpu_irq_update(adev, &adev->crtc_irq, type);
+ amdgpu_irq_update(adev, &adev->pageflip_irq, type);
drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
dce_v8_0_crtc_load_lut(crtc);
break;
@@ -2971,6 +2969,8 @@ static int dce_v8_0_hw_init(void *handle)
dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
}
+ dce_v8_0_pageflip_interrupt_init(adev);
+
return 0;
}
@@ -2985,6 +2985,8 @@ static int dce_v8_0_hw_fini(void *handle)
dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
}
+ dce_v8_0_pageflip_interrupt_fini(adev);
+
return 0;
}
@@ -3373,7 +3375,6 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
- amdgpu_irq_put(adev, &adev->pageflip_irq, crtc_id);
queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index cbc46a34987c..e1dcab98e249 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -235,11 +235,13 @@ static const u32 fiji_golden_common_all[] =
mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
- mmGB_ADDR_CONFIG, 0xffffffff, 0x12011003,
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
- mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
+ mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
};
static const u32 golden_settings_fiji_a10[] =
@@ -247,18 +249,19 @@ static const u32 golden_settings_fiji_a10[] =
mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
mmDB_DEBUG2, 0xf00fffff, 0x00000400,
mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
- mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x00000100,
mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
+ mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
- mmTCC_CTRL, 0x00100000, 0xf30fff7f,
+ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+ mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
- mmTCP_CHAN_STEER_HI, 0xffffffff, 0x7d6cf5e4,
- mmTCP_CHAN_STEER_LO, 0xffffffff, 0x3928b1a0,
+ mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
};
static const u32 fiji_mgcg_cgcg_init[] =
{
- mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffc0,
+ mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
@@ -992,7 +995,7 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.max_cu_per_sh = 16;
adev->gfx.config.max_sh_per_se = 1;
adev->gfx.config.max_backends_per_se = 4;
- adev->gfx.config.max_texture_channel_caches = 8;
+ adev->gfx.config.max_texture_channel_caches = 16;
adev->gfx.config.max_gprs = 256;
adev->gfx.config.max_gs_threads = 32;
adev->gfx.config.max_hw_contexts = 8;
@@ -1031,6 +1034,8 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
case 0x84:
case 0xc8:
case 0xcc:
+ case 0xe1:
+ case 0xe3:
/* B10 */
adev->gfx.config.max_cu_per_sh = 8;
break;
@@ -1039,18 +1044,23 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
case 0x85:
case 0xc9:
case 0xcd:
+ case 0xe2:
+ case 0xe4:
/* B8 */
adev->gfx.config.max_cu_per_sh = 6;
break;
case 0xc6:
case 0xca:
case 0xce:
+ case 0x88:
/* B6 */
adev->gfx.config.max_cu_per_sh = 6;
break;
case 0xc7:
case 0x87:
case 0xcb:
+ case 0xe5:
+ case 0x89:
default:
/* B4 */
adev->gfx.config.max_cu_per_sh = 4;
@@ -1598,6 +1608,296 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
}
case CHIP_FIJI:
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
+ switch (reg_offset) {
+ case 0:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ break;
+ case 1:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ break;
+ case 2:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ break;
+ case 3:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ break;
+ case 4:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ break;
+ case 5:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ break;
+ case 6:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ break;
+ case 7:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ break;
+ case 8:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
+ break;
+ case 9:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ break;
+ case 10:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ break;
+ case 11:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ break;
+ case 12:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ break;
+ case 13:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ break;
+ case 14:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ break;
+ case 15:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ break;
+ case 16:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ break;
+ case 17:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ break;
+ case 18:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 19:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 20:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 21:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 22:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 23:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 24:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 25:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 26:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 27:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ break;
+ case 28:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ break;
+ case 29:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ break;
+ case 30:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ break;
+ default:
+ gb_tile_moden = 0;
+ break;
+ }
+ adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
+ WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
+ }
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
+ switch (reg_offset) {
+ case 0:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 1:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 2:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 3:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 4:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 5:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 6:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 8:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 9:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 10:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 11:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 12:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 13:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 14:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+ break;
+ case 7:
+ /* unused idx */
+ continue;
+ default:
+ gb_tile_moden = 0;
+ break;
+ }
+ adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
+ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
+ }
+ break;
case CHIP_TONGA:
for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
switch (reg_offset) {
@@ -2956,10 +3256,13 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
switch (adev->asic_type) {
case CHIP_TONGA:
- case CHIP_FIJI:
amdgpu_ring_write(ring, 0x16000012);
amdgpu_ring_write(ring, 0x0000002A);
break;
+ case CHIP_FIJI:
+ amdgpu_ring_write(ring, 0x3a00161a);
+ amdgpu_ring_write(ring, 0x0000002e);
+ break;
case CHIP_TOPAZ:
case CHIP_CARRIZO:
amdgpu_ring_write(ring, 0x00000002);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 488348272c4d..ed8abb58a785 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -40,7 +40,7 @@
static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev);
static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
-MODULE_FIRMWARE("radeon/boniare_mc.bin");
+MODULE_FIRMWARE("radeon/bonaire_mc.bin");
MODULE_FIRMWARE("radeon/hawaii_mc.bin");
/**
@@ -501,6 +501,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
WREG32(mmVM_L2_CNTL, tmp);
tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
@@ -512,7 +513,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
WREG32(mmVM_L2_CNTL3, tmp);
/* setup context0 */
WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12);
- WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, (adev->mc.gtt_end >> 12) - 1);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
(u32)(adev->dummy_page.addr >> 12));
@@ -960,12 +961,10 @@ static int gmc_v7_0_sw_init(void *handle)
static int gmc_v7_0_sw_fini(void *handle)
{
- int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->vm_manager.enabled) {
- for (i = 0; i < AMDGPU_NUM_VM; ++i)
- amdgpu_fence_unref(&adev->vm_manager.active[i]);
+ amdgpu_vm_manager_fini(adev);
gmc_v7_0_vm_fini(adev);
adev->vm_manager.enabled = false;
}
@@ -1010,12 +1009,10 @@ static int gmc_v7_0_hw_fini(void *handle)
static int gmc_v7_0_suspend(void *handle)
{
- int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->vm_manager.enabled) {
- for (i = 0; i < AMDGPU_NUM_VM; ++i)
- amdgpu_fence_unref(&adev->vm_manager.active[i]);
+ amdgpu_vm_manager_fini(adev);
gmc_v7_0_vm_fini(adev);
adev->vm_manager.enabled = false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 72e977b1685d..d39028440814 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -629,6 +629,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
WREG32(mmVM_L2_CNTL, tmp);
tmp = RREG32(mmVM_L2_CNTL2);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
@@ -656,7 +657,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
WREG32(mmVM_L2_CNTL4, tmp);
/* setup context0 */
WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12);
- WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, (adev->mc.gtt_end >> 12) - 1);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
(u32)(adev->dummy_page.addr >> 12));
@@ -979,12 +980,10 @@ static int gmc_v8_0_sw_init(void *handle)
static int gmc_v8_0_sw_fini(void *handle)
{
- int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->vm_manager.enabled) {
- for (i = 0; i < AMDGPU_NUM_VM; ++i)
- amdgpu_fence_unref(&adev->vm_manager.active[i]);
+ amdgpu_vm_manager_fini(adev);
gmc_v8_0_vm_fini(adev);
adev->vm_manager.enabled = false;
}
@@ -1031,12 +1030,10 @@ static int gmc_v8_0_hw_fini(void *handle)
static int gmc_v8_0_suspend(void *handle)
{
- int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->vm_manager.enabled) {
- for (i = 0; i < AMDGPU_NUM_VM; ++i)
- amdgpu_fence_unref(&adev->vm_manager.active[i]);
+ amdgpu_vm_manager_fini(adev);
gmc_v8_0_vm_fini(adev);
adev->vm_manager.enabled = false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index 9745ed3a9aef..7e9154c7f1db 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -2997,6 +2997,9 @@ static int kv_dpm_late_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
+ if (!amdgpu_dpm)
+ return 0;
+
/* init the sysfs and debugfs files late */
ret = amdgpu_pm_sysfs_init(adev);
if (ret)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 6a52db6ad8d7..370c6c9d81c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -40,6 +40,9 @@
#define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT 0x04
#define GRBM_GFX_INDEX__VCE_INSTANCE_MASK 0x10
+#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 0x8616
+#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 0x8617
+#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 0x8618
#define VCE_V3_0_FW_SIZE (384 * 1024)
#define VCE_V3_0_STACK_SIZE (64 * 1024)
@@ -130,9 +133,11 @@ static int vce_v3_0_start(struct amdgpu_device *adev)
/* set BUSY flag */
WREG32_P(mmVCE_STATUS, 1, ~1);
-
- WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
- ~VCE_VCPU_CNTL__CLK_EN_MASK);
+ if (adev->asic_type >= CHIP_STONEY)
+ WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
+ else
+ WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
+ ~VCE_VCPU_CNTL__CLK_EN_MASK);
WREG32_P(mmVCE_SOFT_RESET,
VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
@@ -391,8 +396,12 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
WREG32(mmVCE_LMI_SWAP_CNTL, 0);
WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
WREG32(mmVCE_LMI_VM_CTRL, 0);
-
- WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
+ if (adev->asic_type >= CHIP_STONEY) {
+ WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
+ WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
+ WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
+ } else
+ WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
offset = AMDGPU_VCE_FIRMWARE_OFFSET;
size = VCE_V3_0_FW_SIZE;
WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
@@ -576,6 +585,11 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
DRM_DEBUG("IH: VCE\n");
+
+ WREG32_P(mmVCE_SYS_INT_STATUS,
+ VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
+ ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
+
switch (entry->src_data) {
case 0:
amdgpu_fence_process(&adev->vce.ring[0]);
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index d8c93f14c3b5..2adc1c855e85 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1413,17 +1413,18 @@ static int vi_common_early_init(void *handle)
adev->cg_flags = 0;
adev->pg_flags = 0;
adev->external_rev_id = 0x1;
- if (amdgpu_smc_load_fw && smc_enabled)
- adev->firmware.smu_load = true;
break;
case CHIP_FIJI:
+ adev->has_uvd = true;
+ adev->cg_flags = 0;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x3c;
+ break;
case CHIP_TONGA:
adev->has_uvd = true;
adev->cg_flags = 0;
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x14;
- if (amdgpu_smc_load_fw && smc_enabled)
- adev->firmware.smu_load = true;
break;
case CHIP_CARRIZO:
case CHIP_STONEY:
@@ -1432,14 +1433,15 @@ static int vi_common_early_init(void *handle)
/* Disable UVD pg */
adev->pg_flags = /* AMDGPU_PG_SUPPORT_UVD | */AMDGPU_PG_SUPPORT_VCE;
adev->external_rev_id = adev->rev_id + 0x1;
- if (amdgpu_smc_load_fw && smc_enabled)
- adev->firmware.smu_load = true;
break;
default:
/* FIXME: not supported yet */
return -EINVAL;
}
+ if (amdgpu_smc_load_fw && smc_enabled)
+ adev->firmware.smu_load = true;
+
return 0;
}