From 0b063bd3ea9c13df78c82aa742e581c39f9d6156 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Sat, 1 Apr 2017 10:53:02 +0800
Subject: drm/i915/gvt: cleanup some too chatty scheduler message

It's too chatty to have three places to tell us which one
is next vgpu for schedule. My log file was bloated to eat
all disk space..

Cc: Ping Gao <ping.a.gao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/sched_policy.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
index f84959170674..f459ec8b06a1 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -150,9 +150,6 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
 		}
 	}
 
-	gvt_dbg_sched("switch to next vgpu %d\n",
-			scheduler->next_vgpu->id);
-
 	cur_time = ktime_get();
 	if (scheduler->current_vgpu) {
 		vgpu_data = scheduler->current_vgpu->sched_data;
@@ -224,17 +221,12 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data)
 		list_del_init(&vgpu_data->lru_list);
 		list_add_tail(&vgpu_data->lru_list,
 				&sched_data->lru_runq_head);
-
-		gvt_dbg_sched("pick next vgpu %d\n", vgpu->id);
 	} else {
 		scheduler->next_vgpu = gvt->idle_vgpu;
 	}
 out:
-	if (scheduler->next_vgpu) {
-		gvt_dbg_sched("try to schedule next vgpu %d\n",
-				scheduler->next_vgpu->id);
+	if (scheduler->next_vgpu)
 		try_to_schedule_next_vgpu(gvt);
-	}
 }
 
 void intel_gvt_schedule(struct intel_gvt *gvt)
-- 
cgit 


From e1236bc06c534a97f73e09aed5e1094108553e9f Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Thu, 6 Apr 2017 10:55:02 +0800
Subject: drm/i915/gvt: Align render mmio list to cacheline

Make the global mmio list be cacheline aligned to improve performance.

Signed-off-by: Changbin Du <changbin.du@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/render.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c
index e24e57afc45e..679411fe653f 100644
--- a/drivers/gpu/drm/i915/gvt/render.c
+++ b/drivers/gpu/drm/i915/gvt/render.c
@@ -44,7 +44,7 @@ struct render_mmio {
 	u32 value;
 };
 
-static struct render_mmio gen8_render_mmio_list[] = {
+static struct render_mmio gen8_render_mmio_list[] __cacheline_aligned = {
 	{RCS, _MMIO(0x229c), 0xffff, false},
 	{RCS, _MMIO(0x2248), 0x0, false},
 	{RCS, _MMIO(0x2098), 0x0, false},
@@ -75,7 +75,7 @@ static struct render_mmio gen8_render_mmio_list[] = {
 	{BCS, _MMIO(0x22028), 0x0, false},
 };
 
-static struct render_mmio gen9_render_mmio_list[] = {
+static struct render_mmio gen9_render_mmio_list[] __cacheline_aligned = {
 	{RCS, _MMIO(0x229c), 0xffff, false},
 	{RCS, _MMIO(0x2248), 0x0, false},
 	{RCS, _MMIO(0x2098), 0x0, false},
-- 
cgit 


From 80901ca879083ecb5fd08a8d3413220bec9612ac Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Thu, 6 Apr 2017 10:55:43 +0800
Subject: drm/i915/gvt: remove redundant platform check for mocs load/restore

The platform check is done outside, no need check again. Platform doesn't
include mocs should not invoke this two functions.

Signed-off-by: Changbin Du <changbin.du@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/render.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c
index 679411fe653f..a7dae5e2c7cc 100644
--- a/drivers/gpu/drm/i915/gvt/render.c
+++ b/drivers/gpu/drm/i915/gvt/render.c
@@ -204,9 +204,6 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
 	if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
 		return;
 
-	if (!(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)))
-		return;
-
 	offset.reg = regs[ring_id];
 	for (i = 0; i < 64; i++) {
 		gen9_render_mocs[ring_id][i] = I915_READ(offset);
@@ -242,9 +239,6 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
 	if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
 		return;
 
-	if (!(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)))
-		return;
-
 	offset.reg = regs[ring_id];
 	for (i = 0; i < 64; i++) {
 		vgpu_vreg(vgpu, offset) = I915_READ(offset);
-- 
cgit 


From 43c29e1f449d596ed92f12cc19e41d9731ec3312 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Thu, 6 Apr 2017 10:55:55 +0800
Subject: drm/i915/gvt: remove redundant ring id check which cause significant
 CPU misprediction

From perf data, found a significant overhead at ring id check in the
function get_opcode. This inline function is frequently used.

Since Intel static predictor will predict the branch to fall through
so the prediction most fail. This is wasting CPU pipeline resource.
We do not need check the engine id everywhere, it should be reliable.

Signed-off-by: Changbin Du <changbin.du@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/cmd_parser.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 94f2e701e4d4..1abd153bec28 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -616,9 +616,6 @@ static inline u32 get_opcode(u32 cmd, int ring_id)
 {
 	struct decode_info *d_info;
 
-	if (ring_id >= I915_NUM_ENGINES)
-		return INVALID_OP;
-
 	d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)];
 	if (d_info == NULL)
 		return INVALID_OP;
@@ -661,9 +658,6 @@ static inline void print_opcode(u32 cmd, int ring_id)
 	struct decode_info *d_info;
 	int i;
 
-	if (ring_id >= I915_NUM_ENGINES)
-		return;
-
 	d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)];
 	if (d_info == NULL)
 		return;
-- 
cgit 


From fd3bd0a99cffffe476d54edd2eb13b52b1e9a27d Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Thu, 6 Apr 2017 10:56:03 +0800
Subject: drm/i915/gvt: use directly assignment for structure copying

Let c compiler handle the structure copying. The compiler will use
builtin function to handle that.

Signed-off-by: Changbin Du <changbin.du@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 +-
 drivers/gpu/drm/i915/gvt/execlist.c   | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 1abd153bec28..41b2c3aaa04a 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -2477,7 +2477,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
 
 	t1 = get_cycles();
 
-	memcpy(&s_before_advance_custom, s, sizeof(struct parser_exec_state));
+	s_before_advance_custom = *s;
 
 	if (info->handler) {
 		ret = info->handler(s);
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index ce4276a7cf9c..d077ed97970f 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -687,9 +687,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
 	}
 
 	if (emulate_schedule_in)
-		memcpy(&workload->elsp_dwords,
-				&vgpu->execlist[ring_id].elsp_dwords,
-				sizeof(workload->elsp_dwords));
+		workload->elsp_dwords = vgpu->execlist[ring_id].elsp_dwords;
 
 	gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n",
 			workload, ring_id, head, tail, start, ctl);
-- 
cgit 


From efa69d734adbf8a562d58d9fdc3429f2717764e7 Mon Sep 17 00:00:00 2001
From: Pei Zhang <pei.zhang@intel.com>
Date: Fri, 7 Apr 2017 16:50:16 +0800
Subject: drm/i915/gvt: add mmio init for virtual display

GVT implements a purely virtual monitor for virtual GPU independent of
the host. Some DDI related MMIO are not initialized in current code
which cause the display initialization failure in guest. This patch
fills the gap.

Signed-off-by: Pei Zhang <pei.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/display.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index 4cf2b29fbaa1..e0261fcc5b50 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -189,17 +189,44 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 	}
 
 	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
-		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
 		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
+		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
+			TRANS_DDI_PORT_MASK);
+		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
+			(PORT_B << TRANS_DDI_PORT_SHIFT) |
+			TRANS_DDI_FUNC_ENABLE);
+		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) |= DDI_BUF_CTL_ENABLE;
+		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) &= ~DDI_BUF_IS_IDLE;
+		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
 	}
 
 	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
 		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT;
+		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
+			TRANS_DDI_PORT_MASK);
+		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
+			(PORT_C << TRANS_DDI_PORT_SHIFT) |
+			TRANS_DDI_FUNC_ENABLE);
+		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) |= DDI_BUF_CTL_ENABLE;
+		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) &= ~DDI_BUF_IS_IDLE;
 		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED;
 	}
 
 	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) {
 		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
+		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
+			TRANS_DDI_PORT_MASK);
+		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
+			(PORT_D << TRANS_DDI_PORT_SHIFT) |
+			TRANS_DDI_FUNC_ENABLE);
+		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) |= DDI_BUF_CTL_ENABLE;
+		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) &= ~DDI_BUF_IS_IDLE;
 		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED;
 	}
 
-- 
cgit 


From 954180aa69325feade02fa79f056fe1561f31fbb Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Wed, 12 Apr 2017 14:22:50 +0800
Subject: drm/i915/gvt: remove some debug messages in scheduler timer handler

As those debug messages might appear in every timer call for scheduler,
it's too noisy, eat too much log and aren't meaningful. So remove them.

Cc: Ping Gao <ping.a.gao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/sched_policy.c | 7 +------
 drivers/gpu/drm/i915/gvt/scheduler.c    | 5 +----
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
index f459ec8b06a1..79ba4b3440aa 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -133,9 +133,6 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
 	if (!scheduler->next_vgpu)
 		return;
 
-	gvt_dbg_sched("try to schedule next vgpu %d\n",
-			scheduler->next_vgpu->id);
-
 	/*
 	 * after the flag is set, workload dispatch thread will
 	 * stop dispatching workload for current vgpu
@@ -144,10 +141,8 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
 
 	/* still have uncompleted workload? */
 	for_each_engine(engine, gvt->dev_priv, i) {
-		if (scheduler->current_workload[i]) {
-			gvt_dbg_sched("still have running workload\n");
+		if (scheduler->current_workload[i])
 			return;
-		}
 	}
 
 	cur_time = ktime_get();
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 375038252761..0b685dd26cb3 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -274,11 +274,8 @@ static struct intel_vgpu_workload *pick_next_workload(
 		goto out;
 	}
 
-	if (list_empty(workload_q_head(scheduler->current_vgpu, ring_id))) {
-		gvt_dbg_sched("ring id %d stop - no available workload\n",
-				ring_id);
+	if (list_empty(workload_q_head(scheduler->current_vgpu, ring_id)))
 		goto out;
-	}
 
 	/*
 	 * still have current workload, maybe the workload disptacher
-- 
cgit 


From 5ad59bf0960b807f01cb6ef8c54f68b3476a1546 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Wed, 12 Apr 2017 16:24:57 +0800
Subject: drm/i915/gvt: Fix PTE write flush for taking runtime pm properly

Make sure to take runtime pm when write PTE flush which ensure to
write to hw properly. This fixes warning during mdev/vgpu creation
which will do ggtt reset.

------------[ cut here ]------------
WARNING: CPU: 1 PID: 9375 at drivers/gpu/drm/i915/intel_drv.h:1748 fwtable_write32+0x1c2/0x1e0 [i915]
 RPM wakelock ref not held during HW access
Call Trace:
  ? dump_stack+0x5c/0x81
  ? __warn+0xbe/0xe0
  ? warn_slowpath_fmt+0x5a/0x80
  ? wake_up_klogd+0x37/0x40
  ? vprintk_emit+0x2ef/0x370
  ? fwtable_write32+0x1c2/0x1e0 [i915]
  ? gtt_set_entry64+0xbb/0xd0 [i915]
  ? intel_vgpu_reset_ggtt+0x88/0xf0 [i915]
  ? intel_vgpu_init_gtt+0xa5/0x4f0 [i915]
  ? intel_gvt_create_vgpu+0x1b5/0x250 [i915]
  ? kobject_put+0x1b/0x50
  ? intel_vgpu_create+0x4e/0x130 [kvmgt]
  ? mdev_device_create+0x186/0x2a0 [mdev]
  ? create_store+0xba/0xe0 [mdev]
  ? create_store+0xba/0xe0 [mdev]
  ? kernfs_fop_write+0x109/0x1a0
  ? kernfs_fop_write+0x109/0x1a0
  ? __vfs_write+0x33/0x160
  ? __fput+0x161/0x1d0
  ? vfs_write+0xb0/0x190
  ? SyS_write+0x52/0xc0
  ? exit_to_usermode_loop+0x7a/0xa0
  ? entry_SYSCALL_64_fastpath+0x1e/0xad

v2: remove unrelated oops info

v3: change to take runtime pm for ggtt reset instead of get/put for
    each pte write flush

Fixes: d650ac060237 ("drm/i915/gvt: reset the GGTT entry when vGPU created")
Cc: Ping Gao <ping.a.gao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 69d3d8ddecc2..acbe3f2ad6fc 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -2290,12 +2290,15 @@ void intel_gvt_clean_gtt(struct intel_gvt *gvt)
 void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
 {
 	struct intel_gvt *gvt = vgpu->gvt;
+	struct drm_i915_private *dev_priv = gvt->dev_priv;
 	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
 	u32 index;
 	u32 offset;
 	u32 num_entries;
 	struct intel_gvt_gtt_entry e;
 
+	intel_runtime_pm_get(dev_priv);
+
 	memset(&e, 0, sizeof(struct intel_gvt_gtt_entry));
 	e.type = GTT_TYPE_GGTT_PTE;
 	ops->set_pfn(&e, gvt->gtt.scratch_ggtt_mfn);
@@ -2310,6 +2313,8 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
 	num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
 	for (offset = 0; offset < num_entries; offset++)
 		ops->set_entry(NULL, &e, index + offset, false, 0, vgpu);
+
+	intel_runtime_pm_put(dev_priv);
 }
 
 /**
-- 
cgit 


From c821ee6d2bb4cfc9991bf285f53103cde9d3593a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 13 Apr 2017 22:48:28 +0300
Subject: drm/i915/gvt: fix a bounds check in ring_id_to_context_switch_event()

There are two bugs here.  The && should be || and the > is off by one so
it should be >= ARRAY_SIZE().

Fixes: 8453d674ae7e ("drm/i915/gvt: vGPU execlist virtualization")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/execlist.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index d077ed97970f..dc9aef3e92d4 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -56,8 +56,8 @@ static int context_switch_events[] = {
 
 static int ring_id_to_context_switch_event(int ring_id)
 {
-	if (WARN_ON(ring_id < RCS && ring_id >
-				ARRAY_SIZE(context_switch_events)))
+	if (WARN_ON(ring_id < RCS ||
+		    ring_id >= ARRAY_SIZE(context_switch_events)))
 		return -EINVAL;
 
 	return context_switch_events[ring_id];
-- 
cgit