diff options
Diffstat (limited to 'drivers/misc')
| -rw-r--r-- | drivers/misc/atmel-ssc.c | 24 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/command_submission.c | 27 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/debugfs.c | 27 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/device.c | 2 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/firmware_if.c | 10 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 160 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudiP.h | 3 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 20 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/habanalabs.h | 19 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/habanalabs_drv.c | 2 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/hwmon.c | 19 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/include/gaudi/gaudi_packets.h | 3 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/sysfs.c | 11 | ||||
| -rw-r--r-- | drivers/misc/kgdbts.c | 6 | ||||
| -rw-r--r-- | drivers/misc/mei/bus.c | 3 | ||||
| -rw-r--r-- | drivers/misc/mei/hw-me-regs.h | 3 | ||||
| -rw-r--r-- | drivers/misc/mei/hw-me.c | 70 | ||||
| -rw-r--r-- | drivers/misc/mei/hw-me.h | 17 | ||||
| -rw-r--r-- | drivers/misc/mei/pci-me.c | 17 | 
19 files changed, 309 insertions, 134 deletions
| diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c index ab4144ea1f11..d6cd5537126c 100644 --- a/drivers/misc/atmel-ssc.c +++ b/drivers/misc/atmel-ssc.c @@ -10,7 +10,7 @@  #include <linux/clk.h>  #include <linux/err.h>  #include <linux/io.h> -#include <linux/spinlock.h> +#include <linux/mutex.h>  #include <linux/atmel-ssc.h>  #include <linux/slab.h>  #include <linux/module.h> @@ -20,7 +20,7 @@  #include "../../sound/soc/atmel/atmel_ssc_dai.h"  /* Serialize access to ssc_list and user count */ -static DEFINE_SPINLOCK(user_lock); +static DEFINE_MUTEX(user_lock);  static LIST_HEAD(ssc_list);  struct ssc_device *ssc_request(unsigned int ssc_num) @@ -28,7 +28,7 @@ struct ssc_device *ssc_request(unsigned int ssc_num)  	int ssc_valid = 0;  	struct ssc_device *ssc; -	spin_lock(&user_lock); +	mutex_lock(&user_lock);  	list_for_each_entry(ssc, &ssc_list, list) {  		if (ssc->pdev->dev.of_node) {  			if (of_alias_get_id(ssc->pdev->dev.of_node, "ssc") @@ -44,18 +44,18 @@ struct ssc_device *ssc_request(unsigned int ssc_num)  	}  	if (!ssc_valid) { -		spin_unlock(&user_lock); +		mutex_unlock(&user_lock);  		pr_err("ssc: ssc%d platform device is missing\n", ssc_num);  		return ERR_PTR(-ENODEV);  	}  	if (ssc->user) { -		spin_unlock(&user_lock); +		mutex_unlock(&user_lock);  		dev_dbg(&ssc->pdev->dev, "module busy\n");  		return ERR_PTR(-EBUSY);  	}  	ssc->user++; -	spin_unlock(&user_lock); +	mutex_unlock(&user_lock);  	clk_prepare(ssc->clk); @@ -67,14 +67,14 @@ void ssc_free(struct ssc_device *ssc)  {  	bool disable_clk = true; -	spin_lock(&user_lock); +	mutex_lock(&user_lock);  	if (ssc->user)  		ssc->user--;  	else {  		disable_clk = false;  		dev_dbg(&ssc->pdev->dev, "device already free\n");  	} -	spin_unlock(&user_lock); +	mutex_unlock(&user_lock);  	if (disable_clk)  		clk_unprepare(ssc->clk); @@ -237,9 +237,9 @@ static int ssc_probe(struct platform_device *pdev)  		return -ENXIO;  	} -	spin_lock(&user_lock); +	mutex_lock(&user_lock);  	list_add_tail(&ssc->list, &ssc_list); -	spin_unlock(&user_lock); +	mutex_unlock(&user_lock);  	platform_set_drvdata(pdev, ssc); @@ -258,9 +258,9 @@ static int ssc_remove(struct platform_device *pdev)  	ssc_sound_dai_remove(ssc); -	spin_lock(&user_lock); +	mutex_lock(&user_lock);  	list_del(&ssc->list); -	spin_unlock(&user_lock); +	mutex_unlock(&user_lock);  	return 0;  } diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index f82974a916c3..f3a8f113865d 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -62,6 +62,12 @@ static void hl_fence_release(struct dma_fence *fence)  		container_of(fence, struct hl_cs_compl, base_fence);  	struct hl_device *hdev = hl_cs_cmpl->hdev; +	/* EBUSY means the CS was never submitted and hence we don't have +	 * an attached hw_sob object that we should handle here +	 */ +	if (fence->error == -EBUSY) +		goto free; +  	if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||  			(hl_cs_cmpl->type == CS_TYPE_WAIT)) { @@ -92,6 +98,7 @@ static void hl_fence_release(struct dma_fence *fence)  		kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);  	} +free:  	kfree_rcu(hl_cs_cmpl, base_fence.rcu);  } @@ -328,10 +335,16 @@ static void cs_do_release(struct kref *ref)  	hl_ctx_put(cs->ctx); +	/* We need to mark an error for not submitted because in that case +	 * the dma fence release flow is different. Mainly, we don't need +	 * to handle hw_sob for signal/wait +	 */  	if (cs->timedout)  		dma_fence_set_error(cs->fence, -ETIMEDOUT);  	else if (cs->aborted)  		dma_fence_set_error(cs->fence, -EIO); +	else if (!cs->submitted) +		dma_fence_set_error(cs->fence, -EBUSY);  	dma_fence_signal(cs->fence);  	dma_fence_put(cs->fence); @@ -486,11 +499,19 @@ static int validate_queue_index(struct hl_device *hdev,  	struct asic_fixed_properties *asic = &hdev->asic_prop;  	struct hw_queue_properties *hw_queue_prop; +	/* This must be checked here to prevent out-of-bounds access to +	 * hw_queues_props array +	 */ +	if (chunk->queue_index >= HL_MAX_QUEUES) { +		dev_err(hdev->dev, "Queue index %d is invalid\n", +			chunk->queue_index); +		return -EINVAL; +	} +  	hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; -	if ((chunk->queue_index >= HL_MAX_QUEUES) || -			(hw_queue_prop->type == QUEUE_TYPE_NA)) { -		dev_err(hdev->dev, "Queue index %d is invalid\n", +	if (hw_queue_prop->type == QUEUE_TYPE_NA) { +		dev_err(hdev->dev, "Queue index %d is not applicable\n",  			chunk->queue_index);  		return -EINVAL;  	} diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index 3c8dcdfba20c..0bc036e01ee8 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -36,7 +36,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,  	pkt.i2c_reg = i2c_reg;  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -					HL_DEVICE_TIMEOUT_USEC, (long *) val); +						0, (long *) val);  	if (rc)  		dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc); @@ -63,7 +63,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,  	pkt.value = cpu_to_le64(val);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -					HL_DEVICE_TIMEOUT_USEC, NULL); +						0, NULL);  	if (rc)  		dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc); @@ -87,7 +87,7 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)  	pkt.value = cpu_to_le64(state);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -						HL_DEVICE_TIMEOUT_USEC, NULL); +						0, NULL);  	if (rc)  		dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc); @@ -480,7 +480,7 @@ out:  	return 0;  } -static ssize_t mmu_write(struct file *file, const char __user *buf, +static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,  		size_t count, loff_t *f_pos)  {  	struct seq_file *s = file->private_data; @@ -981,7 +981,7 @@ static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,  	if (*ppos)  		return 0; -	sprintf(tmp_buf, "%d\n", hdev->clock_gating); +	sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask);  	rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,  			strlen(tmp_buf) + 1); @@ -993,7 +993,7 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,  {  	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;  	struct hl_device *hdev = entry->hdev; -	u32 value; +	u64 value;  	ssize_t rc;  	if (atomic_read(&hdev->in_reset)) { @@ -1002,19 +1002,12 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,  		return 0;  	} -	rc = kstrtouint_from_user(buf, count, 10, &value); +	rc = kstrtoull_from_user(buf, count, 16, &value);  	if (rc)  		return rc; -	if (value) { -		hdev->clock_gating = 1; -		if (hdev->asic_funcs->enable_clock_gating) -			hdev->asic_funcs->enable_clock_gating(hdev); -	} else { -		if (hdev->asic_funcs->disable_clock_gating) -			hdev->asic_funcs->disable_clock_gating(hdev); -		hdev->clock_gating = 0; -	} +	hdev->clock_gating_mask = value; +	hdev->asic_funcs->set_clock_gating(hdev);  	return count;  } @@ -1125,7 +1118,7 @@ static const struct hl_info_list hl_debugfs_list[] = {  	{"command_submission_jobs", command_submission_jobs_show, NULL},  	{"userptr", userptr_show, NULL},  	{"vm", vm_show, NULL}, -	{"mmu", mmu_show, mmu_write}, +	{"mmu", mmu_show, mmu_asid_va_write},  	{"engines", engines_show, NULL}  }; diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 2b38a119704c..59608d1bac88 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -608,7 +608,7 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)  		hdev->in_debug = 0;  		if (!hdev->hard_reset_pending) -			hdev->asic_funcs->enable_clock_gating(hdev); +			hdev->asic_funcs->set_clock_gating(hdev);  		goto out;  	} diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c index baf790cf4b78..d27841cb5bcb 100644 --- a/drivers/misc/habanalabs/firmware_if.c +++ b/drivers/misc/habanalabs/firmware_if.c @@ -61,7 +61,7 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)  	pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT);  	return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, -				sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL); +						sizeof(pkt), 0, NULL);  }  int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, @@ -144,7 +144,7 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)  	pkt.value = cpu_to_le64(event_type);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -			HL_DEVICE_TIMEOUT_USEC, &result); +						0, &result);  	if (rc)  		dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); @@ -183,7 +183,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,  						ARMCP_PKT_CTL_OPCODE_SHIFT);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, -			total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result); +						total_pkt_size, 0, &result);  	if (rc)  		dev_err(hdev->dev, "failed to unmask IRQ array\n"); @@ -204,7 +204,7 @@ int hl_fw_test_cpu_queue(struct hl_device *hdev)  	test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt, -			sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result); +						sizeof(test_pkt), 0, &result);  	if (!rc) {  		if (result != ARMCP_PACKET_FENCE_VAL) @@ -248,7 +248,7 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)  	hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, -			sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result); +						sizeof(hb_pkt), 0, &result);  	if ((rc) || (result != ARMCP_PACKET_FENCE_VAL))  		rc = -EIO; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 61f88e9884ce..637a9d608707 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -80,6 +80,7 @@  #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)  #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)  #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	1000000		/* 1s */ +#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */  #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9 @@ -96,7 +97,12 @@  #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3 -#define GAUDI_ARB_WDT_TIMEOUT		0x400000 +#define GAUDI_ARB_WDT_TIMEOUT		0x1000000 + +#define GAUDI_CLK_GATE_DEBUGFS_MASK	(\ +		BIT(GAUDI_ENGINE_ID_MME_0) |\ +		BIT(GAUDI_ENGINE_ID_MME_2) |\ +		GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))  static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {  		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", @@ -106,14 +112,14 @@ static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {  };  static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { -	[GAUDI_PCI_DMA_1] = 0, -	[GAUDI_PCI_DMA_2] = 1, -	[GAUDI_PCI_DMA_3] = 5, -	[GAUDI_HBM_DMA_1] = 2, -	[GAUDI_HBM_DMA_2] = 3, -	[GAUDI_HBM_DMA_3] = 4, -	[GAUDI_HBM_DMA_4] = 6, -	[GAUDI_HBM_DMA_5] = 7 +	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, +	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, +	[GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5, +	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, +	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, +	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, +	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6, +	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7  };  static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { @@ -1819,7 +1825,7 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)  	gaudi_init_rate_limiter(hdev); -	gaudi_disable_clock_gating(hdev); +	hdev->asic_funcs->disable_clock_gating(hdev);  	for (tpc_id = 0, tpc_offset = 0;  				tpc_id < TPC_NUMBER_OF_ENGINES; @@ -1893,6 +1899,8 @@ static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,  	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);  	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); +	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100); +  	/* The following configuration is needed only once per QMAN */  	if (qman_id == 0) {  		/* Configure RAZWI IRQ */ @@ -2529,46 +2537,55 @@ static void gaudi_tpc_stall(struct hl_device *hdev)  	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);  } -static void gaudi_enable_clock_gating(struct hl_device *hdev) +static void gaudi_set_clock_gating(struct hl_device *hdev)  {  	struct gaudi_device *gaudi = hdev->asic_specific;  	u32 qman_offset;  	int i; -	if (!hdev->clock_gating) -		return; - -	if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) -		return; -  	/* In case we are during debug session, don't enable the clock gate  	 * as it may interfere  	 */  	if (hdev->in_debug)  		return; -	for (i = 0, qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { +	for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) { +		if (!(hdev->clock_gating_mask & +					(BIT_ULL(gaudi_dma_assignment[i])))) +			continue; +  		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;  		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);  		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,  				QMAN_UPPER_CP_CGM_PWR_GATE_EN);  	} -	for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { +	for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) { +		if (!(hdev->clock_gating_mask & +					(BIT_ULL(gaudi_dma_assignment[i])))) +			continue; +  		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;  		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);  		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,  				QMAN_COMMON_CP_CGM_PWR_GATE_EN);  	} -	WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); -	WREG32(mmMME0_QM_CGM_CFG, -			QMAN_COMMON_CP_CGM_PWR_GATE_EN); -	WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); -	WREG32(mmMME2_QM_CGM_CFG, -			QMAN_COMMON_CP_CGM_PWR_GATE_EN); +	if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0))) { +		WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); +		WREG32(mmMME0_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN); +	} + +	if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2))) { +		WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); +		WREG32(mmMME2_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN); +	}  	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { +		if (!(hdev->clock_gating_mask & +					(BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)))) +			continue; +  		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,  				QMAN_CGM1_PWR_GATE_EN);  		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, @@ -2661,7 +2678,7 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)  	gaudi_stop_hbm_dma_qmans(hdev);  	gaudi_stop_pci_dma_qmans(hdev); -	gaudi_disable_clock_gating(hdev); +	hdev->asic_funcs->disable_clock_gating(hdev);  	msleep(wait_timeout_ms); @@ -2725,6 +2742,12 @@ static int gaudi_mmu_init(struct hl_device *hdev)  	WREG32(mmSTLB_HOP_CONFIGURATION,  			hdev->mmu_huge_page_opt ? 0x30440 : 0x40440); +	/* +	 * The H/W expects the first PI after init to be 1. After wraparound +	 * we'll write 0. +	 */ +	gaudi->mmu_cache_inv_pi = 1; +  	gaudi->hw_cap_initialized |= HW_CAP_MMU;  	return 0; @@ -2995,7 +3018,7 @@ static int gaudi_hw_init(struct hl_device *hdev)  	gaudi_init_tpc_qmans(hdev); -	gaudi_enable_clock_gating(hdev); +	hdev->asic_funcs->set_clock_gating(hdev);  	gaudi_enable_timestamp(hdev); @@ -3104,7 +3127,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)  					HW_CAP_HBM_DMA | HW_CAP_PLL |  					HW_CAP_MMU |  					HW_CAP_SRAM_SCRAMBLER | -					HW_CAP_HBM_SCRAMBLER); +					HW_CAP_HBM_SCRAMBLER | +					HW_CAP_CLK_GATE); +  	memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));  } @@ -3455,6 +3480,9 @@ static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,  		return 0;  	} +	if (!timeout) +		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; +  	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,  						timeout, result);  } @@ -3790,6 +3818,25 @@ static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,  						src_in_host);  } +static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev, +					struct hl_cs_parser *parser, +					struct packet_load_and_exe *user_pkt) +{ +	u32 cfg; + +	cfg = le32_to_cpu(user_pkt->cfg); + +	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) { +		dev_err(hdev->dev, +			"User not allowed to use Load and Execute\n"); +		return -EPERM; +	} + +	parser->patched_cb_size += sizeof(struct packet_load_and_exe); + +	return 0; +} +  static int gaudi_validate_cb(struct hl_device *hdev,  			struct hl_cs_parser *parser, bool is_mmu)  { @@ -3838,6 +3885,17 @@ static int gaudi_validate_cb(struct hl_device *hdev,  			rc = -EPERM;  			break; +		case PACKET_WREG_BULK: +			dev_err(hdev->dev, +				"User not allowed to use WREG_BULK\n"); +			rc = -EPERM; +			break; + +		case PACKET_LOAD_AND_EXE: +			rc = gaudi_validate_load_and_exe_pkt(hdev, parser, +				(struct packet_load_and_exe *) user_pkt); +			break; +  		case PACKET_LIN_DMA:  			parser->contains_dma_pkt = true;  			if (is_mmu) @@ -3848,14 +3906,12 @@ static int gaudi_validate_cb(struct hl_device *hdev,  			break;  		case PACKET_WREG_32: -		case PACKET_WREG_BULK:  		case PACKET_MSG_LONG:  		case PACKET_MSG_SHORT:  		case PACKET_REPEAT:  		case PACKET_FENCE:  		case PACKET_NOP:  		case PACKET_ARB_POINT: -		case PACKET_LOAD_AND_EXE:  			parser->patched_cb_size += pkt_size;  			break; @@ -4490,13 +4546,18 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)  	int rc = 0;  	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { -		if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + +		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && +				(hdev->clock_gating_mask & +						GAUDI_CLK_GATE_DEBUGFS_MASK)) { +  			dev_err_ratelimited(hdev->dev,  				"Can't read register - clock gating is enabled!\n");  			rc = -EFAULT;  		} else {  			*val = RREG32(addr - CFG_BASE);  		} +  	} else if ((addr >= SRAM_BASE_ADDR) &&  			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {  		*val = readl(hdev->pcie_bar[SRAM_BAR_ID] + @@ -4532,13 +4593,18 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)  	int rc = 0;  	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { -		if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + +		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && +				(hdev->clock_gating_mask & +						GAUDI_CLK_GATE_DEBUGFS_MASK)) { +  			dev_err_ratelimited(hdev->dev,  				"Can't write register - clock gating is enabled!\n");  			rc = -EFAULT;  		} else {  			WREG32(addr - CFG_BASE, val);  		} +  	} else if ((addr >= SRAM_BASE_ADDR) &&  			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {  		writel(val, hdev->pcie_bar[SRAM_BAR_ID] + @@ -4574,7 +4640,11 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)  	int rc = 0;  	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { -		if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + +		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && +				(hdev->clock_gating_mask & +						GAUDI_CLK_GATE_DEBUGFS_MASK)) { +  			dev_err_ratelimited(hdev->dev,  				"Can't read register - clock gating is enabled!\n");  			rc = -EFAULT; @@ -4584,6 +4654,7 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)  			*val = (((u64) val_h) << 32) | val_l;  		} +  	} else if ((addr >= SRAM_BASE_ADDR) &&  		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {  		*val = readq(hdev->pcie_bar[SRAM_BAR_ID] + @@ -4620,7 +4691,11 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)  	int rc = 0;  	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { -		if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + +		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && +				(hdev->clock_gating_mask & +						GAUDI_CLK_GATE_DEBUGFS_MASK)) { +  			dev_err_ratelimited(hdev->dev,  				"Can't write register - clock gating is enabled!\n");  			rc = -EFAULT; @@ -4629,6 +4704,7 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)  			WREG32(addr + sizeof(u32) - CFG_BASE,  				upper_32_bits(val));  		} +  	} else if ((addr >= SRAM_BASE_ADDR) &&  		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {  		writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + @@ -4850,7 +4926,7 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)  	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);  	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); -	hdev->asic_funcs->enable_clock_gating(hdev); +	hdev->asic_funcs->set_clock_gating(hdev);  	mutex_unlock(&gaudi->clk_gate_mutex);  } @@ -5231,7 +5307,7 @@ static void gaudi_print_ecc_info_generic(struct hl_device *hdev,  	}  	if (disable_clock_gating) { -		hdev->asic_funcs->enable_clock_gating(hdev); +		hdev->asic_funcs->set_clock_gating(hdev);  		mutex_unlock(&gaudi->clk_gate_mutex);  	}  } @@ -5718,7 +5794,7 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,  	/* Clear interrupts */  	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); -	hdev->asic_funcs->enable_clock_gating(hdev); +	hdev->asic_funcs->set_clock_gating(hdev);  	mutex_unlock(&gaudi->clk_gate_mutex); @@ -5994,6 +6070,8 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,  	mutex_lock(&hdev->mmu_cache_lock);  	/* L0 & L1 invalidation */ +	WREG32(mmSTLB_INV_PS, 3); +	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);  	WREG32(mmSTLB_INV_PS, 2);  	rc = hl_poll_timeout( @@ -6232,7 +6310,7 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,  	if (s)  		seq_puts(s, "\n"); -	hdev->asic_funcs->enable_clock_gating(hdev); +	hdev->asic_funcs->set_clock_gating(hdev);  	mutex_unlock(&gaudi->clk_gate_mutex); @@ -6333,7 +6411,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,  		dev_err(hdev->dev,  			"Timeout while waiting for TPC%d icache prefetch\n",  			tpc_id); -		hdev->asic_funcs->enable_clock_gating(hdev); +		hdev->asic_funcs->set_clock_gating(hdev);  		mutex_unlock(&gaudi->clk_gate_mutex);  		return -EIO;  	} @@ -6362,7 +6440,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,  		1000,  		kernel_timeout); -	hdev->asic_funcs->enable_clock_gating(hdev); +	hdev->asic_funcs->set_clock_gating(hdev);  	mutex_unlock(&gaudi->clk_gate_mutex);  	if (rc) { @@ -6703,7 +6781,7 @@ static const struct hl_asic_funcs gaudi_funcs = {  	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,  	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,  	.send_heartbeat = gaudi_send_heartbeat, -	.enable_clock_gating = gaudi_enable_clock_gating, +	.set_clock_gating = gaudi_set_clock_gating,  	.disable_clock_gating = gaudi_disable_clock_gating,  	.debug_coresight = gaudi_debug_coresight,  	.is_device_idle = gaudi_is_device_idle, diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index a46530d375fa..41a8d9bff6bf 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -229,6 +229,8 @@ struct gaudi_internal_qman_info {   * @multi_msi_mode: whether we are working in multi MSI single MSI mode.   *                  Multi MSI is possible only with IOMMU enabled.   * @ext_queue_idx: helper index for external queues initialization. + * @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an + *                    8-bit value so use u8.   */  struct gaudi_device {  	int (*armcp_info_get)(struct hl_device *hdev); @@ -248,6 +250,7 @@ struct gaudi_device {  	u32				hw_cap_initialized;  	u8				multi_msi_mode;  	u8				ext_queue_idx; +	u8				mmu_cache_inv_pi;  };  void gaudi_init_security(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 0d2952bb58df..88460b2138d8 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -88,6 +88,7 @@  #define GOYA_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)  #define GOYA_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)  #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC	1000000		/* 1s */ +#define GOYA_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */  #define GOYA_QMAN0_FENCE_VAL		0xD169B243 @@ -2830,6 +2831,9 @@ int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,  		return 0;  	} +	if (!timeout) +		timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC; +  	return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,  					timeout, result);  } @@ -4431,8 +4435,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,  	pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<  						ARMCP_PKT_CTL_OPCODE_SHIFT); -	rc = goya_send_cpu_message(hdev, (u32 *) pkt, total_pkt_size, -			HL_DEVICE_TIMEOUT_USEC, &result); +	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, +						total_pkt_size,	0, &result);  	if (rc)  		dev_err(hdev->dev, "failed to unmask IRQ array\n"); @@ -4464,8 +4468,8 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)  				ARMCP_PKT_CTL_OPCODE_SHIFT);  	pkt.value = cpu_to_le64(event_type); -	rc = goya_send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -			HL_DEVICE_TIMEOUT_USEC, &result); +	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), +						0, &result);  	if (rc)  		dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); @@ -5028,14 +5032,14 @@ int goya_armcp_info_get(struct hl_device *hdev)  	return 0;  } -static void goya_enable_clock_gating(struct hl_device *hdev) +static void goya_set_clock_gating(struct hl_device *hdev)  { - +	/* clock gating not supported in Goya */  }  static void goya_disable_clock_gating(struct hl_device *hdev)  { - +	/* clock gating not supported in Goya */  }  static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask, @@ -5259,7 +5263,7 @@ static const struct hl_asic_funcs goya_funcs = {  	.mmu_invalidate_cache = goya_mmu_invalidate_cache,  	.mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,  	.send_heartbeat = goya_send_heartbeat, -	.enable_clock_gating = goya_enable_clock_gating, +	.set_clock_gating = goya_set_clock_gating,  	.disable_clock_gating = goya_disable_clock_gating,  	.debug_coresight = goya_debug_coresight,  	.is_device_idle = goya_is_device_idle, diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 1ecdcf8b763a..194d83352696 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -578,8 +578,9 @@ enum hl_pll_frequency {   * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with   *                              ASID-VA-size mask.   * @send_heartbeat: send is-alive packet to ArmCP and verify response. - * @enable_clock_gating: enable clock gating for reducing power consumption. - * @disable_clock_gating: disable clock for accessing registers on HBW. + * @set_clock_gating: enable/disable clock gating per engine according to + *                    clock gating mask in hdev + * @disable_clock_gating: disable clock gating completely   * @debug_coresight: perform certain actions on Coresight for debugging.   * @is_device_idle: return true if device is idle, false otherwise.   * @soft_reset_late_init: perform certain actions needed after soft reset. @@ -587,7 +588,11 @@ enum hl_pll_frequency {   * @hw_queues_unlock: release H/W queues lock.   * @get_pci_id: retrieve PCI ID.   * @get_eeprom_data: retrieve EEPROM data from F/W. - * @send_cpu_message: send buffer to ArmCP. + * @send_cpu_message: send message to F/W. If the message is timedout, the + *                    driver will eventually reset the device. The timeout can + *                    be determined by the calling function or it can be 0 and + *                    then the timeout is the default timeout for the specific + *                    ASIC   * @get_hw_state: retrieve the H/W state   * @pci_bars_map: Map PCI BARs.   * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns @@ -680,7 +685,7 @@ struct hl_asic_funcs {  	int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,  			u32 asid, u64 va, u64 size);  	int (*send_heartbeat)(struct hl_device *hdev); -	void (*enable_clock_gating)(struct hl_device *hdev); +	void (*set_clock_gating)(struct hl_device *hdev);  	void (*disable_clock_gating)(struct hl_device *hdev);  	int (*debug_coresight)(struct hl_device *hdev, void *data);  	bool (*is_device_idle)(struct hl_device *hdev, u32 *mask, @@ -1398,6 +1403,9 @@ struct hl_device_idle_busy_ts {   * @max_power: the max power of the device, as configured by the sysadmin. This   *             value is saved so in case of hard-reset, the driver will restore   *             this value and update the F/W after the re-initialization + * @clock_gating_mask: is clock gating enabled. bitmask that represents the + *                     different engines. See debugfs-driver-habanalabs for + *                     details.   * @in_reset: is device in reset flow.   * @curr_pll_profile: current PLL profile.   * @cs_active_cnt: number of active command submissions on this device (active @@ -1425,7 +1433,6 @@ struct hl_device_idle_busy_ts {   * @init_done: is the initialization of the device done.   * @mmu_enable: is MMU enabled.   * @mmu_huge_page_opt: is MMU huge pages optimization enabled. - * @clock_gating: is clock gating enabled.   * @device_cpu_disabled: is the device CPU disabled (due to timeouts)   * @dma_mask: the dma mask that was set for this device   * @in_debug: is device under debug. This, together with fpriv_list, enforces @@ -1493,6 +1500,7 @@ struct hl_device {  	atomic64_t			dram_used_mem;  	u64				timeout_jiffies;  	u64				max_power; +	u64				clock_gating_mask;  	atomic_t			in_reset;  	enum hl_pll_frequency		curr_pll_profile;  	int				cs_active_cnt; @@ -1514,7 +1522,6 @@ struct hl_device {  	u8				dram_default_page_mapping;  	u8				pmmu_huge_range;  	u8				init_done; -	u8				clock_gating;  	u8				device_cpu_disabled;  	u8				dma_mask;  	u8				in_debug; diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c index 8652c7e5d7f1..22716da9f85f 100644 --- a/drivers/misc/habanalabs/habanalabs_drv.c +++ b/drivers/misc/habanalabs/habanalabs_drv.c @@ -232,7 +232,7 @@ static void set_driver_behavior_per_device(struct hl_device *hdev)  	hdev->fw_loading = 1;  	hdev->cpu_queues_enable = 1;  	hdev->heartbeat = 1; -	hdev->clock_gating = 1; +	hdev->clock_gating_mask = ULONG_MAX;  	hdev->reset_pcilink = 0;  	hdev->axi_drain = 0; diff --git a/drivers/misc/habanalabs/hwmon.c b/drivers/misc/habanalabs/hwmon.c index 8c6cd77e6af6..b997336fa75f 100644 --- a/drivers/misc/habanalabs/hwmon.c +++ b/drivers/misc/habanalabs/hwmon.c @@ -10,7 +10,6 @@  #include <linux/pci.h>  #include <linux/hwmon.h> -#define SENSORS_PKT_TIMEOUT		1000000	/* 1s */  #define HWMON_NR_SENSOR_TYPES		(hwmon_pwm + 1)  int hl_build_hwmon_channel_info(struct hl_device *hdev, @@ -323,7 +322,7 @@ int hl_get_temperature(struct hl_device *hdev,  	pkt.type = __cpu_to_le16(attr);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -			SENSORS_PKT_TIMEOUT, value); +						0, value);  	if (rc) {  		dev_err(hdev->dev, @@ -350,7 +349,7 @@ int hl_set_temperature(struct hl_device *hdev,  	pkt.value = __cpu_to_le64(value);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -						SENSORS_PKT_TIMEOUT, NULL); +						0, NULL);  	if (rc)  		dev_err(hdev->dev, @@ -374,7 +373,7 @@ int hl_get_voltage(struct hl_device *hdev,  	pkt.type = __cpu_to_le16(attr);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -					SENSORS_PKT_TIMEOUT, value); +						0, value);  	if (rc) {  		dev_err(hdev->dev, @@ -400,7 +399,7 @@ int hl_get_current(struct hl_device *hdev,  	pkt.type = __cpu_to_le16(attr);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -					SENSORS_PKT_TIMEOUT, value); +						0, value);  	if (rc) {  		dev_err(hdev->dev, @@ -426,7 +425,7 @@ int hl_get_fan_speed(struct hl_device *hdev,  	pkt.type = __cpu_to_le16(attr);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -					SENSORS_PKT_TIMEOUT, value); +						0, value);  	if (rc) {  		dev_err(hdev->dev, @@ -452,7 +451,7 @@ int hl_get_pwm_info(struct hl_device *hdev,  	pkt.type = __cpu_to_le16(attr);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -					SENSORS_PKT_TIMEOUT, value); +						0, value);  	if (rc) {  		dev_err(hdev->dev, @@ -479,7 +478,7 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,  	pkt.value = cpu_to_le64(value);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -					SENSORS_PKT_TIMEOUT, NULL); +						0, NULL);  	if (rc)  		dev_err(hdev->dev, @@ -502,7 +501,7 @@ int hl_set_voltage(struct hl_device *hdev,  	pkt.value = __cpu_to_le64(value);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -						SENSORS_PKT_TIMEOUT, NULL); +						0, NULL);  	if (rc)  		dev_err(hdev->dev, @@ -527,7 +526,7 @@ int hl_set_current(struct hl_device *hdev,  	pkt.value = __cpu_to_le64(value);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -						SENSORS_PKT_TIMEOUT, NULL); +						0, NULL);  	if (rc)  		dev_err(hdev->dev, diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h b/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h index 9a5800b0086b..0f0cd067bb43 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h @@ -197,6 +197,9 @@ struct packet_wait {  	__le32 ctl;  }; +#define GAUDI_PKT_LOAD_AND_EXE_CFG_DST_SHIFT	0 +#define GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK	0x00000001 +  struct packet_load_and_exe {  	__le32 cfg;  	__le32 ctl; diff --git a/drivers/misc/habanalabs/sysfs.c b/drivers/misc/habanalabs/sysfs.c index 5d78d5e1c782..70b6b1863c2e 100644 --- a/drivers/misc/habanalabs/sysfs.c +++ b/drivers/misc/habanalabs/sysfs.c @@ -9,9 +9,6 @@  #include <linux/pci.h> -#define SET_CLK_PKT_TIMEOUT	1000000	/* 1s */ -#define SET_PWR_PKT_TIMEOUT	1000000	/* 1s */ -  long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)  {  	struct armcp_packet pkt; @@ -29,7 +26,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)  	pkt.pll_index = cpu_to_le32(pll_index);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -						SET_CLK_PKT_TIMEOUT, &result); +						0, &result);  	if (rc) {  		dev_err(hdev->dev, @@ -54,7 +51,7 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)  	pkt.value = cpu_to_le64(freq);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -					SET_CLK_PKT_TIMEOUT, NULL); +						0, NULL);  	if (rc)  		dev_err(hdev->dev, @@ -74,7 +71,7 @@ u64 hl_get_max_power(struct hl_device *hdev)  				ARMCP_PKT_CTL_OPCODE_SHIFT);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -						SET_PWR_PKT_TIMEOUT, &result); +						0, &result);  	if (rc) {  		dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); @@ -96,7 +93,7 @@ void hl_set_max_power(struct hl_device *hdev, u64 value)  	pkt.value = cpu_to_le64(value);  	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), -					SET_PWR_PKT_TIMEOUT, NULL); +						0, NULL);  	if (rc)  		dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index bccd341e9ae1..d5d2af4d10e6 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -828,7 +828,7 @@ static void run_plant_and_detach_test(int is_early)  	char before[BREAK_INSTR_SIZE];  	char after[BREAK_INSTR_SIZE]; -	probe_kernel_read(before, (char *)kgdbts_break_test, +	copy_from_kernel_nofault(before, (char *)kgdbts_break_test,  	  BREAK_INSTR_SIZE);  	init_simple_test();  	ts.tst = plant_and_detach_test; @@ -836,8 +836,8 @@ static void run_plant_and_detach_test(int is_early)  	/* Activate test with initial breakpoint */  	if (!is_early)  		kgdb_breakpoint(); -	probe_kernel_read(after, (char *)kgdbts_break_test, -	  BREAK_INSTR_SIZE); +	copy_from_kernel_nofault(after, (char *)kgdbts_break_test, +			BREAK_INSTR_SIZE);  	if (memcmp(before, after, BREAK_INSTR_SIZE)) {  		printk(KERN_CRIT "kgdbts: ERROR kgdb corrupted memory\n");  		panic("kgdb memory corruption"); diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 8d468e0a950a..f476dbc7252b 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -745,9 +745,8 @@ static int mei_cl_device_remove(struct device *dev)  	mei_cl_bus_module_put(cldev);  	module_put(THIS_MODULE); -	dev->driver = NULL; -	return ret; +	return ret;  }  static ssize_t name_show(struct device *dev, struct device_attribute *a, diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 9392934e3a06..7becfc768bbc 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -94,6 +94,7 @@  #define MEI_DEV_ID_JSP_N      0x4DE0  /* Jasper Lake Point N */  #define MEI_DEV_ID_TGP_LP     0xA0E0  /* Tiger Lake Point LP */ +#define MEI_DEV_ID_TGP_H      0x43E0  /* Tiger Lake Point H */  #define MEI_DEV_ID_MCC        0x4B70  /* Mule Creek Canyon (EHL) */  #define MEI_DEV_ID_MCC_4      0x4B75  /* Mule Creek Canyon 4 (EHL) */ @@ -107,6 +108,8 @@  #  define PCI_CFG_HFS_1_D0I3_MSK     0x80000000  #define PCI_CFG_HFS_2         0x48  #define PCI_CFG_HFS_3         0x60 +#  define PCI_CFG_HFS_3_FW_SKU_MSK   0x00000070 +#  define PCI_CFG_HFS_3_FW_SKU_SPS   0x00000060  #define PCI_CFG_HFS_4         0x64  #define PCI_CFG_HFS_5         0x68  #define PCI_CFG_HFS_6         0x6C diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index f620442addf5..7649710a2ab9 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -1366,7 +1366,7 @@ static bool mei_me_fw_type_nm(struct pci_dev *pdev)  #define MEI_CFG_FW_NM                           \  	.quirk_probe = mei_me_fw_type_nm -static bool mei_me_fw_type_sps(struct pci_dev *pdev) +static bool mei_me_fw_type_sps_4(struct pci_dev *pdev)  {  	u32 reg;  	unsigned int devfn; @@ -1382,7 +1382,36 @@ static bool mei_me_fw_type_sps(struct pci_dev *pdev)  	return (reg & 0xf0000) == 0xf0000;  } -#define MEI_CFG_FW_SPS                           \ +#define MEI_CFG_FW_SPS_4                          \ +	.quirk_probe = mei_me_fw_type_sps_4 + +/** + * mei_me_fw_sku_sps() - check for sps sku + * + * Read ME FW Status register to check for SPS Firmware. + * The SPS FW is only signaled in pci function 0 + * + * @pdev: pci device + * + * Return: true in case of SPS firmware + */ +static bool mei_me_fw_type_sps(struct pci_dev *pdev) +{ +	u32 reg; +	u32 fw_type; +	unsigned int devfn; + +	devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0); +	pci_bus_read_config_dword(pdev->bus, devfn, PCI_CFG_HFS_3, ®); +	trace_mei_pci_cfg_read(&pdev->dev, "PCI_CFG_HFS_3", PCI_CFG_HFS_3, reg); +	fw_type = (reg & PCI_CFG_HFS_3_FW_SKU_MSK); + +	dev_dbg(&pdev->dev, "fw type is %d\n", fw_type); + +	return fw_type == PCI_CFG_HFS_3_FW_SKU_SPS; +} + +#define MEI_CFG_FW_SPS                          \  	.quirk_probe = mei_me_fw_type_sps  #define MEI_CFG_FW_VER_SUPP                     \ @@ -1452,10 +1481,17 @@ static const struct mei_cfg mei_me_pch8_cfg = {  };  /* PCH8 Lynx Point with quirk for SPS Firmware exclusion */ -static const struct mei_cfg mei_me_pch8_sps_cfg = { +static const struct mei_cfg mei_me_pch8_sps_4_cfg = {  	MEI_CFG_PCH8_HFS,  	MEI_CFG_FW_VER_SUPP, -	MEI_CFG_FW_SPS, +	MEI_CFG_FW_SPS_4, +}; + +/* LBG with quirk for SPS (4.0) Firmware exclusion */ +static const struct mei_cfg mei_me_pch12_sps_4_cfg = { +	MEI_CFG_PCH8_HFS, +	MEI_CFG_FW_VER_SUPP, +	MEI_CFG_FW_SPS_4,  };  /* Cannon Lake and newer devices */ @@ -1465,10 +1501,20 @@ static const struct mei_cfg mei_me_pch12_cfg = {  	MEI_CFG_DMA_128,  }; -/* LBG with quirk for SPS Firmware exclusion */ +/* Cannon Lake with quirk for SPS 5.0 and newer Firmware exclusion */  static const struct mei_cfg mei_me_pch12_sps_cfg = {  	MEI_CFG_PCH8_HFS,  	MEI_CFG_FW_VER_SUPP, +	MEI_CFG_DMA_128, +	MEI_CFG_FW_SPS, +}; + +/* Cannon Lake with quirk for SPS 5.0 and newer Firmware exclusion + * w/o DMA support + */ +static const struct mei_cfg mei_me_pch12_nodma_sps_cfg = { +	MEI_CFG_PCH8_HFS, +	MEI_CFG_FW_VER_SUPP,  	MEI_CFG_FW_SPS,  }; @@ -1480,6 +1526,15 @@ static const struct mei_cfg mei_me_pch15_cfg = {  	MEI_CFG_TRC,  }; +/* Tiger Lake with quirk for SPS 5.0 and newer Firmware exclusion */ +static const struct mei_cfg mei_me_pch15_sps_cfg = { +	MEI_CFG_PCH8_HFS, +	MEI_CFG_FW_VER_SUPP, +	MEI_CFG_DMA_128, +	MEI_CFG_TRC, +	MEI_CFG_FW_SPS, +}; +  /*   * mei_cfg_list - A list of platform platform specific configurations.   * Note: has to be synchronized with  enum mei_cfg_idx. @@ -1492,10 +1547,13 @@ static const struct mei_cfg *const mei_cfg_list[] = {  	[MEI_ME_PCH7_CFG] = &mei_me_pch7_cfg,  	[MEI_ME_PCH_CPT_PBG_CFG] = &mei_me_pch_cpt_pbg_cfg,  	[MEI_ME_PCH8_CFG] = &mei_me_pch8_cfg, -	[MEI_ME_PCH8_SPS_CFG] = &mei_me_pch8_sps_cfg, +	[MEI_ME_PCH8_SPS_4_CFG] = &mei_me_pch8_sps_4_cfg,  	[MEI_ME_PCH12_CFG] = &mei_me_pch12_cfg, +	[MEI_ME_PCH12_SPS_4_CFG] = &mei_me_pch12_sps_4_cfg,  	[MEI_ME_PCH12_SPS_CFG] = &mei_me_pch12_sps_cfg, +	[MEI_ME_PCH12_SPS_NODMA_CFG] = &mei_me_pch12_nodma_sps_cfg,  	[MEI_ME_PCH15_CFG] = &mei_me_pch15_cfg, +	[MEI_ME_PCH15_SPS_CFG] = &mei_me_pch15_sps_cfg,  };  const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx) diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h index b6b94e211464..6a8973649c49 100644 --- a/drivers/misc/mei/hw-me.h +++ b/drivers/misc/mei/hw-me.h @@ -1,6 +1,6 @@  /* SPDX-License-Identifier: GPL-2.0 */  /* - * Copyright (c) 2012-2019, Intel Corporation. All rights reserved. + * Copyright (c) 2012-2020, Intel Corporation. All rights reserved.   * Intel Management Engine Interface (Intel MEI) Linux driver   */ @@ -76,14 +76,20 @@ struct mei_me_hw {   *                         with quirk for Node Manager exclusion.   * @MEI_ME_PCH8_CFG:       Platform Controller Hub Gen8 and newer   *                         client platforms. - * @MEI_ME_PCH8_SPS_CFG:   Platform Controller Hub Gen8 and newer + * @MEI_ME_PCH8_SPS_4_CFG: Platform Controller Hub Gen8 and newer   *                         servers platforms with quirk for   *                         SPS firmware exclusion.   * @MEI_ME_PCH12_CFG:      Platform Controller Hub Gen12 and newer - * @MEI_ME_PCH12_SPS_CFG:  Platform Controller Hub Gen12 and newer + * @MEI_ME_PCH12_SPS_4_CFG:Platform Controller Hub Gen12 up to 4.0 + *                         servers platforms with quirk for + *                         SPS firmware exclusion. + * @MEI_ME_PCH12_SPS_CFG:  Platform Controller Hub Gen12 5.0 and newer   *                         servers platforms with quirk for   *                         SPS firmware exclusion.   * @MEI_ME_PCH15_CFG:      Platform Controller Hub Gen15 and newer + * @MEI_ME_PCH15_SPS_CFG:  Platform Controller Hub Gen15 and newer + *                         servers platforms with quirk for + *                         SPS firmware exclusion.   * @MEI_ME_NUM_CFG:        Upper Sentinel.   */  enum mei_cfg_idx { @@ -94,10 +100,13 @@ enum mei_cfg_idx {  	MEI_ME_PCH7_CFG,  	MEI_ME_PCH_CPT_PBG_CFG,  	MEI_ME_PCH8_CFG, -	MEI_ME_PCH8_SPS_CFG, +	MEI_ME_PCH8_SPS_4_CFG,  	MEI_ME_PCH12_CFG, +	MEI_ME_PCH12_SPS_4_CFG,  	MEI_ME_PCH12_SPS_CFG, +	MEI_ME_PCH12_SPS_NODMA_CFG,  	MEI_ME_PCH15_CFG, +	MEI_ME_PCH15_SPS_CFG,  	MEI_ME_NUM_CFG,  }; diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 71f795b510ce..2a3f2fd5df50 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -59,18 +59,18 @@ static const struct pci_device_id mei_me_pci_tbl[] = {  	{MEI_PCI_DEVICE(MEI_DEV_ID_PPT_1, MEI_ME_PCH7_CFG)},  	{MEI_PCI_DEVICE(MEI_DEV_ID_PPT_2, MEI_ME_PCH7_CFG)},  	{MEI_PCI_DEVICE(MEI_DEV_ID_PPT_3, MEI_ME_PCH7_CFG)}, -	{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_H, MEI_ME_PCH8_SPS_CFG)}, -	{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_W, MEI_ME_PCH8_SPS_CFG)}, +	{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_H, MEI_ME_PCH8_SPS_4_CFG)}, +	{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_W, MEI_ME_PCH8_SPS_4_CFG)},  	{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_LP, MEI_ME_PCH8_CFG)}, -	{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_HR, MEI_ME_PCH8_SPS_CFG)}, +	{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_HR, MEI_ME_PCH8_SPS_4_CFG)},  	{MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP, MEI_ME_PCH8_CFG)},  	{MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP_2, MEI_ME_PCH8_CFG)},  	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT, MEI_ME_PCH8_CFG)},  	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, MEI_ME_PCH8_CFG)}, -	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, MEI_ME_PCH8_SPS_CFG)}, -	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, MEI_ME_PCH8_SPS_CFG)}, -	{MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_SPS_CFG)}, +	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, MEI_ME_PCH8_SPS_4_CFG)}, +	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, MEI_ME_PCH8_SPS_4_CFG)}, +	{MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_SPS_4_CFG)},  	{MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)},  	{MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)}, @@ -84,8 +84,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = {  	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP, MEI_ME_PCH12_CFG)},  	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP_3, MEI_ME_PCH8_CFG)}, -	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH12_CFG)}, -	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_3, MEI_ME_PCH8_CFG)}, +	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH12_SPS_CFG)}, +	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_3, MEI_ME_PCH12_SPS_NODMA_CFG)},  	{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)},  	{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_CFG)}, @@ -96,6 +96,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = {  	{MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)},  	{MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH15_CFG)}, +	{MEI_PCI_DEVICE(MEI_DEV_ID_TGP_H, MEI_ME_PCH15_SPS_CFG)},  	{MEI_PCI_DEVICE(MEI_DEV_ID_JSP_N, MEI_ME_PCH15_CFG)}, |