aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_ih.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c2
11 files changed, 60 insertions, 37 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 08478fce00f2..2430d6223c2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -350,6 +350,7 @@ static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
* amdgpu_gmc_filter_faults - filter VM faults
*
* @adev: amdgpu device structure
+ * @ih: interrupt ring that the fault received from
* @addr: address of the VM fault
* @pasid: PASID of the process causing the fault
* @timestamp: timestamp of the fault
@@ -358,7 +359,8 @@ static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
* True if the fault was filtered and should not be processed further.
* False if the fault is a new one and needs to be handled.
*/
-bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
+bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih, uint64_t addr,
uint16_t pasid, uint64_t timestamp)
{
struct amdgpu_gmc *gmc = &adev->gmc;
@@ -366,6 +368,10 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
struct amdgpu_gmc_fault *fault;
uint32_t hash;
+ /* Stale retry fault if timestamp goes backward */
+ if (amdgpu_ih_ts_after(timestamp, ih->processed_timestamp))
+ return true;
+
/* If we don't have space left in the ring buffer return immediately */
stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
AMDGPU_GMC_FAULT_TIMEOUT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index e55201134a01..8458cebc6d5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -316,7 +316,8 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
-bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
+bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih, uint64_t addr,
uint16_t pasid, uint64_t timestamp);
void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
uint16_t pasid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 0c7963dfacad..8050f7ba93ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -164,52 +164,32 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
}
}
-/* Waiter helper that checks current rptr matches or passes checkpoint wptr */
-static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev,
- struct amdgpu_ih_ring *ih,
- uint32_t checkpoint_wptr,
- uint32_t *prev_rptr)
-{
- uint32_t cur_rptr = ih->rptr | (*prev_rptr & ~ih->ptr_mask);
-
- /* rptr has wrapped. */
- if (cur_rptr < *prev_rptr)
- cur_rptr += ih->ptr_mask + 1;
- *prev_rptr = cur_rptr;
-
- /* check ring is empty to workaround missing wptr overflow flag */
- return cur_rptr >= checkpoint_wptr ||
- (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih);
-}
-
/**
- * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to checkpoint
+ * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint
*
* @adev: amdgpu_device pointer
* @ih: ih ring to process
*
* Used to ensure ring has processed IVs up to the checkpoint write pointer.
*/
-int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
+int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih)
{
- uint32_t checkpoint_wptr, rptr;
+ uint32_t checkpoint_wptr;
+ uint64_t checkpoint_ts;
+ long timeout = HZ;
if (!ih->enabled || adev->shutdown)
return -ENODEV;
checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
- /* Order wptr with rptr. */
+ /* Order wptr with ring data. */
rmb();
- rptr = READ_ONCE(ih->rptr);
-
- /* wptr has wrapped. */
- if (rptr > checkpoint_wptr)
- checkpoint_wptr += ih->ptr_mask + 1;
+ checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
- return wait_event_interruptible(ih->wait_process,
- amdgpu_ih_has_checkpoint_processed(adev, ih,
- checkpoint_wptr, &rptr));
+ return wait_event_interruptible_timeout(ih->wait_process,
+ !amdgpu_ih_ts_after(ih->processed_timestamp, checkpoint_ts),
+ timeout);
}
/**
@@ -299,3 +279,18 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
/* wptr/rptr are in bytes! */
ih->rptr += 32;
}
+
+uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
+ signed int offset)
+{
+ uint32_t iv_size = 32;
+ uint32_t ring_index;
+ uint32_t dw1, dw2;
+
+ rptr += iv_size * offset;
+ ring_index = (rptr & ih->ptr_mask) >> 2;
+
+ dw1 = le32_to_cpu(ih->ring[ring_index + 1]);
+ dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
+ return dw1 | ((u64)(dw2 & 0xffff) << 32);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 0649b59830a5..dd1c2eded6b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -68,20 +68,30 @@ struct amdgpu_ih_ring {
/* For waiting on IH processing at checkpoint. */
wait_queue_head_t wait_process;
+ uint64_t processed_timestamp;
};
+/* return true if time stamp t2 is after t1 with 48bit wrap around */
+#define amdgpu_ih_ts_after(t1, t2) \
+ (((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) > 0LL)
+
/* provided by the ih block */
struct amdgpu_ih_funcs {
/* ring read/write ptr handling, called from interrupt context */
u32 (*get_wptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
void (*decode_iv)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
struct amdgpu_iv_entry *entry);
+ uint64_t (*decode_iv_ts)(struct amdgpu_ih_ring *ih, u32 rptr,
+ signed int offset);
void (*set_rptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
};
#define amdgpu_ih_get_wptr(adev, ih) (adev)->irq.ih_funcs->get_wptr((adev), (ih))
#define amdgpu_ih_decode_iv(adev, iv) \
(adev)->irq.ih_funcs->decode_iv((adev), (ih), (iv))
+#define amdgpu_ih_decode_iv_ts(adev, ih, rptr, offset) \
+ (WARN_ON_ONCE(!(adev)->irq.ih_funcs->decode_iv_ts) ? 0 : \
+ (adev)->irq.ih_funcs->decode_iv_ts((ih), (rptr), (offset)))
#define amdgpu_ih_set_rptr(adev, ih) (adev)->irq.ih_funcs->set_rptr((adev), (ih))
int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
@@ -89,10 +99,12 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
unsigned int num_dw);
-int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
- struct amdgpu_ih_ring *ih);
+int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih);
int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih,
struct amdgpu_iv_entry *entry);
+uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
+ signed int offset);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 4f3c62adccbd..3907fc726ab2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -528,6 +528,12 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
/* Send it to amdkfd as well if it isn't already handled */
if (!handled)
amdgpu_amdkfd_interrupt(adev, entry.iv_entry);
+
+ dev_WARN_ONCE(adev->dev, ih->processed_timestamp == entry.timestamp,
+ "IH timestamps are not unique");
+
+ if (amdgpu_ih_ts_after(ih->processed_timestamp, entry.timestamp))
+ ih->processed_timestamp = entry.timestamp;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 3ec5ff5a6dbe..d696c4754bea 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -107,7 +107,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
/* Process it onyl if it's the first fault for this address */
if (entry->ih != &adev->irq.ih_soft &&
- amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
entry->timestamp))
return 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index cb82404df534..7490ce8295c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -523,7 +523,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
/* Process it onyl if it's the first fault for this address */
if (entry->ih != &adev->irq.ih_soft &&
- amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
entry->timestamp))
return 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 38241cf0e1f1..8ce5b8ca1fd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -716,6 +716,7 @@ static const struct amd_ip_funcs navi10_ih_ip_funcs = {
static const struct amdgpu_ih_funcs navi10_ih_funcs = {
.get_wptr = navi10_ih_get_wptr,
.decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
.set_rptr = navi10_ih_set_rptr
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index a9ca6988009e..3070466f54e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -640,6 +640,7 @@ const struct amd_ip_funcs vega10_ih_ip_funcs = {
static const struct amdgpu_ih_funcs vega10_ih_funcs = {
.get_wptr = vega10_ih_get_wptr,
.decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
.set_rptr = vega10_ih_set_rptr
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index f51dfc38ac65..3b4eb8285943 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -688,6 +688,7 @@ const struct amd_ip_funcs vega20_ih_ip_funcs = {
static const struct amdgpu_ih_funcs vega20_ih_funcs = {
.get_wptr = vega20_ih_get_wptr,
.decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
.set_rptr = vega20_ih_set_rptr
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 10868d5b549f..663489ae56d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1974,7 +1974,7 @@ restart:
pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
- amdgpu_ih_wait_on_checkpoint_process(pdd->dev->adev,
+ amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
&pdd->dev->adev->irq.ih1);
pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
}