diff options
Diffstat (limited to 'drivers/cxl/core')
| -rw-r--r-- | drivers/cxl/core/core.h | 11 | ||||
| -rw-r--r-- | drivers/cxl/core/hdm.c | 178 | ||||
| -rw-r--r-- | drivers/cxl/core/mbox.c | 163 | ||||
| -rw-r--r-- | drivers/cxl/core/memdev.c | 227 | ||||
| -rw-r--r-- | drivers/cxl/core/pci.c | 258 | ||||
| -rw-r--r-- | drivers/cxl/core/pmem.c | 6 | ||||
| -rw-r--r-- | drivers/cxl/core/port.c | 48 | ||||
| -rw-r--r-- | drivers/cxl/core/region.c | 157 | ||||
| -rw-r--r-- | drivers/cxl/core/trace.c | 94 | ||||
| -rw-r--r-- | drivers/cxl/core/trace.h | 103 | 
10 files changed, 1023 insertions, 222 deletions
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index cde475e13216..27f0968449de 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -25,7 +25,12 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);  #define CXL_DAX_REGION_TYPE(x) (&cxl_dax_region_type)  int cxl_region_init(void);  void cxl_region_exit(void); +int cxl_get_poison_by_endpoint(struct cxl_port *port);  #else +static inline int cxl_get_poison_by_endpoint(struct cxl_port *port) +{ +	return 0; +}  static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)  {  } @@ -64,4 +69,10 @@ int cxl_memdev_init(void);  void cxl_memdev_exit(void);  void cxl_mbox_init(void); +enum cxl_poison_trace_type { +	CXL_POISON_TRACE_LIST, +	CXL_POISON_TRACE_INJECT, +	CXL_POISON_TRACE_CLEAR, +}; +  #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 45deda18ed32..7889ff203a34 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -1,6 +1,5 @@  // SPDX-License-Identifier: GPL-2.0-only  /* Copyright(c) 2022 Intel Corporation. All rights reserved. */ -#include <linux/io-64-nonatomic-hi-lo.h>  #include <linux/seq_file.h>  #include <linux/device.h>  #include <linux/delay.h> @@ -93,33 +92,57 @@ static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb,  	cxl_probe_component_regs(&port->dev, crb, &map.component_map);  	if (!map.component_map.hdm_decoder.valid) { -		dev_err(&port->dev, "HDM decoder registers invalid\n"); -		return -ENXIO; +		dev_dbg(&port->dev, "HDM decoder registers not implemented\n"); +		/* unique error code to indicate no HDM decoder capability */ +		return -ENODEV;  	}  	return cxl_map_component_regs(&port->dev, regs, &map,  				      BIT(CXL_CM_CAP_CAP_ID_HDM));  } -static struct cxl_hdm *devm_cxl_setup_emulated_hdm(struct cxl_port *port, -						   struct cxl_endpoint_dvsec_info *info) +static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)  { -	struct device *dev = &port->dev;  	struct cxl_hdm *cxlhdm; +	void __iomem *hdm; +	u32 ctrl; +	int i; -	if (!info->mem_enabled) -		return ERR_PTR(-ENODEV); +	if (!info) +		return false; -	cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL); -	if (!cxlhdm) -		return ERR_PTR(-ENOMEM); +	cxlhdm = dev_get_drvdata(&info->port->dev); +	hdm = cxlhdm->regs.hdm_decoder; -	cxlhdm->port = port; -	cxlhdm->decoder_count = info->ranges; -	cxlhdm->target_count = info->ranges; -	dev_set_drvdata(&port->dev, cxlhdm); +	if (!hdm) +		return true; -	return cxlhdm; +	/* +	 * If HDM decoders are present and the driver is in control of +	 * Mem_Enable skip DVSEC based emulation +	 */ +	if (!info->mem_enabled) +		return false; + +	/* +	 * If any decoders are committed already, there should not be any +	 * emulated DVSEC decoders. +	 */ +	for (i = 0; i < cxlhdm->decoder_count; i++) { +		ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); +		dev_dbg(&info->port->dev, +			"decoder%d.%d: committed: %ld base: %#x_%.8x size: %#x_%.8x\n", +			info->port->id, i, +			FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl), +			readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i)), +			readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(i)), +			readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i)), +			readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i))); +		if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl)) +			return false; +	} + +	return true;  }  /** @@ -138,13 +161,14 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,  	cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);  	if (!cxlhdm)  		return ERR_PTR(-ENOMEM); -  	cxlhdm->port = port; -	crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); -	if (!crb) { -		if (info && info->mem_enabled) -			return devm_cxl_setup_emulated_hdm(port, info); +	dev_set_drvdata(dev, cxlhdm); +	crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); +	if (!crb && info && info->mem_enabled) { +		cxlhdm->decoder_count = info->ranges; +		return cxlhdm; +	} else if (!crb) {  		dev_err(dev, "No component registers mapped\n");  		return ERR_PTR(-ENXIO);  	} @@ -160,7 +184,15 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,  		return ERR_PTR(-ENXIO);  	} -	dev_set_drvdata(dev, cxlhdm); +	/* +	 * Now that the hdm capability is parsed, decide if range +	 * register emulation is needed and fixup cxlhdm accordingly. +	 */ +	if (should_emulate_decoders(info)) { +		dev_dbg(dev, "Fallback map %d range register%s\n", info->ranges, +			info->ranges > 1 ? "s" : ""); +		cxlhdm->decoder_count = info->ranges; +	}  	return cxlhdm;  } @@ -245,8 +277,11 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,  	lockdep_assert_held_write(&cxl_dpa_rwsem); -	if (!len) -		goto success; +	if (!len) { +		dev_warn(dev, "decoder%d.%d: empty reservation attempted\n", +			 port->id, cxled->cxld.id); +		return -EINVAL; +	}  	if (cxled->dpa_res) {  		dev_dbg(dev, "decoder%d.%d: existing allocation %pr assigned\n", @@ -299,7 +334,6 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,  		cxled->mode = CXL_DECODER_MIXED;  	} -success:  	port->hdm_end++;  	get_device(&cxled->cxld.dev);  	return 0; @@ -714,14 +748,20 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld)  	return 0;  } -static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port, -					    struct cxl_decoder *cxld, int which, -					    struct cxl_endpoint_dvsec_info *info) +static int cxl_setup_hdm_decoder_from_dvsec( +	struct cxl_port *port, struct cxl_decoder *cxld, u64 *dpa_base, +	int which, struct cxl_endpoint_dvsec_info *info)  { +	struct cxl_endpoint_decoder *cxled; +	u64 len; +	int rc; +  	if (!is_cxl_endpoint(port))  		return -EOPNOTSUPP; -	if (!range_len(&info->dvsec_range[which])) +	cxled = to_cxl_endpoint_decoder(&cxld->dev); +	len = range_len(&info->dvsec_range[which]); +	if (!len)  		return -ENOENT;  	cxld->target_type = CXL_DECODER_EXPANDER; @@ -736,41 +776,25 @@ static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port,  	cxld->flags |= CXL_DECODER_F_ENABLE | CXL_DECODER_F_LOCK;  	port->commit_end = cxld->id; -	return 0; -} - -static bool should_emulate_decoders(struct cxl_port *port) -{ -	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); -	void __iomem *hdm = cxlhdm->regs.hdm_decoder; -	u32 ctrl; -	int i; - -	if (!is_cxl_endpoint(cxlhdm->port)) -		return false; - -	if (!hdm) -		return true; - -	/* -	 * If any decoders are committed already, there should not be any -	 * emulated DVSEC decoders. -	 */ -	for (i = 0; i < cxlhdm->decoder_count; i++) { -		ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); -		if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl)) -			return false; +	rc = devm_cxl_dpa_reserve(cxled, *dpa_base, len, 0); +	if (rc) { +		dev_err(&port->dev, +			"decoder%d.%d: Failed to reserve DPA range %#llx - %#llx\n (%d)", +			port->id, cxld->id, *dpa_base, *dpa_base + len - 1, rc); +		return rc;  	} +	*dpa_base += len; +	cxled->state = CXL_DECODER_STATE_AUTO; -	return true; +	return 0;  }  static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,  			    int *target_map, void __iomem *hdm, int which,  			    u64 *dpa_base, struct cxl_endpoint_dvsec_info *info)  { -	struct cxl_endpoint_decoder *cxled = NULL; -	u64 size, base, skip, dpa_size; +	u64 size, base, skip, dpa_size, lo, hi; +	struct cxl_endpoint_decoder *cxled;  	bool committed;  	u32 remainder;  	int i, rc; @@ -780,15 +804,17 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,  		unsigned char target_id[8];  	} target_list; -	if (should_emulate_decoders(port)) -		return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info); - -	if (is_endpoint_decoder(&cxld->dev)) -		cxled = to_cxl_endpoint_decoder(&cxld->dev); +	if (should_emulate_decoders(info)) +		return cxl_setup_hdm_decoder_from_dvsec(port, cxld, dpa_base, +							which, info);  	ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which)); -	base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which)); -	size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which)); +	lo = readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which)); +	hi = readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(which)); +	base = (hi << 32) + lo; +	lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which)); +	hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(which)); +	size = (hi << 32) + lo;  	committed = !!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED);  	cxld->commit = cxl_decoder_commit;  	cxld->reset = cxl_decoder_reset; @@ -806,9 +832,6 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,  		.end = base + size - 1,  	}; -	if (cxled && !committed && range_len(&info->dvsec_range[which])) -		return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info); -  	/* decoders are enabled if committed */  	if (committed) {  		cxld->flags |= CXL_DECODER_F_ENABLE; @@ -824,6 +847,13 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,  				 port->id, cxld->id);  			return -ENXIO;  		} + +		if (size == 0) { +			dev_warn(&port->dev, +				 "decoder%d.%d: Committed with zero size\n", +				 port->id, cxld->id); +			return -ENXIO; +		}  		port->commit_end = cxld->id;  	} else {  		/* unless / until type-2 drivers arrive, assume type-3 */ @@ -846,9 +876,14 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,  	if (rc)  		return rc; -	if (!cxled) { -		target_list.value = -			ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which)); +	dev_dbg(&port->dev, "decoder%d.%d: range: %#llx-%#llx iw: %d ig: %d\n", +		port->id, cxld->id, cxld->hpa_range.start, cxld->hpa_range.end, +		cxld->interleave_ways, cxld->interleave_granularity); + +	if (!info) { +		lo = readl(hdm + CXL_HDM_DECODER0_TL_LOW(which)); +		hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(which)); +		target_list.value = (hi << 32) + lo;  		for (i = 0; i < cxld->interleave_ways; i++)  			target_map[i] = target_list.target_id[i]; @@ -865,7 +900,10 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,  			port->id, cxld->id, size, cxld->interleave_ways);  		return -ENXIO;  	} -	skip = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SKIP_LOW(which)); +	lo = readl(hdm + CXL_HDM_DECODER0_SKIP_LOW(which)); +	hi = readl(hdm + CXL_HDM_DECODER0_SKIP_HIGH(which)); +	skip = (hi << 32) + lo; +	cxled = to_cxl_endpoint_decoder(&cxld->dev);  	rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip);  	if (rc) {  		dev_err(&port->dev, diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index f2addb457172..bea9cf31a12d 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1,10 +1,11 @@  // SPDX-License-Identifier: GPL-2.0-only  /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ -#include <linux/io-64-nonatomic-lo-hi.h>  #include <linux/security.h>  #include <linux/debugfs.h>  #include <linux/ktime.h>  #include <linux/mutex.h> +#include <asm/unaligned.h> +#include <cxlpci.h>  #include <cxlmem.h>  #include <cxl.h> @@ -61,12 +62,7 @@ static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {  	CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0),  	CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0),  	CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0), -	CXL_CMD(GET_POISON, 0x10, CXL_VARIABLE_PAYLOAD, 0), -	CXL_CMD(INJECT_POISON, 0x8, 0, 0), -	CXL_CMD(CLEAR_POISON, 0x48, 0, 0),  	CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0), -	CXL_CMD(SCAN_MEDIA, 0x11, 0, 0), -	CXL_CMD(GET_SCAN_MEDIA, 0, CXL_VARIABLE_PAYLOAD, 0),  };  /* @@ -87,6 +83,9 @@ static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {   *   * CXL_MBOX_OP_[GET_]SCAN_MEDIA: The kernel provides a native error list that   * is kept up to date with patrol notifications and error management. + * + * CXL_MBOX_OP_[GET_,INJECT_,CLEAR_]POISON: These commands require kernel + * driver orchestration for safety.   */  static u16 cxl_disabled_raw_commands[] = {  	CXL_MBOX_OP_ACTIVATE_FW, @@ -95,6 +94,9 @@ static u16 cxl_disabled_raw_commands[] = {  	CXL_MBOX_OP_SET_SHUTDOWN_STATE,  	CXL_MBOX_OP_SCAN_MEDIA,  	CXL_MBOX_OP_GET_SCAN_MEDIA, +	CXL_MBOX_OP_GET_POISON, +	CXL_MBOX_OP_INJECT_POISON, +	CXL_MBOX_OP_CLEAR_POISON,  };  /* @@ -119,6 +121,43 @@ static bool cxl_is_security_command(u16 opcode)  	return false;  } +static bool cxl_is_poison_command(u16 opcode) +{ +#define CXL_MBOX_OP_POISON_CMDS 0x43 + +	if ((opcode >> 8) == CXL_MBOX_OP_POISON_CMDS) +		return true; + +	return false; +} + +static void cxl_set_poison_cmd_enabled(struct cxl_poison_state *poison, +				       u16 opcode) +{ +	switch (opcode) { +	case CXL_MBOX_OP_GET_POISON: +		set_bit(CXL_POISON_ENABLED_LIST, poison->enabled_cmds); +		break; +	case CXL_MBOX_OP_INJECT_POISON: +		set_bit(CXL_POISON_ENABLED_INJECT, poison->enabled_cmds); +		break; +	case CXL_MBOX_OP_CLEAR_POISON: +		set_bit(CXL_POISON_ENABLED_CLEAR, poison->enabled_cmds); +		break; +	case CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS: +		set_bit(CXL_POISON_ENABLED_SCAN_CAPS, poison->enabled_cmds); +		break; +	case CXL_MBOX_OP_SCAN_MEDIA: +		set_bit(CXL_POISON_ENABLED_SCAN_MEDIA, poison->enabled_cmds); +		break; +	case CXL_MBOX_OP_GET_SCAN_MEDIA: +		set_bit(CXL_POISON_ENABLED_SCAN_RESULTS, poison->enabled_cmds); +		break; +	default: +		break; +	} +} +  static struct cxl_mem_command *cxl_mem_find_command(u16 opcode)  {  	struct cxl_mem_command *c; @@ -634,13 +673,18 @@ static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel)  		u16 opcode = le16_to_cpu(cel_entry[i].opcode);  		struct cxl_mem_command *cmd = cxl_mem_find_command(opcode); -		if (!cmd) { +		if (!cmd && !cxl_is_poison_command(opcode)) {  			dev_dbg(cxlds->dev,  				"Opcode 0x%04x unsupported by driver\n", opcode);  			continue;  		} -		set_bit(cmd->info.id, cxlds->enabled_cmds); +		if (cmd) +			set_bit(cmd->info.id, cxlds->enabled_cmds); + +		if (cxl_is_poison_command(opcode)) +			cxl_set_poison_cmd_enabled(&cxlds->poison, opcode); +  		dev_dbg(cxlds->dev, "Opcode 0x%04x enabled\n", opcode);  	}  } @@ -984,7 +1028,7 @@ static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds)   * cxl_dev_state_identify() - Send the IDENTIFY command to the device.   * @cxlds: The device data for the operation   * - * Return: 0 if identify was executed successfully. + * Return: 0 if identify was executed successfully or media not ready.   *   * This will dispatch the identify command to the device and on success populate   * structures to be exported to sysfs. @@ -994,8 +1038,12 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)  	/* See CXL 2.0 Table 175 Identify Memory Device Output Payload */  	struct cxl_mbox_identify id;  	struct cxl_mbox_cmd mbox_cmd; +	u32 val;  	int rc; +	if (!cxlds->media_ready) +		return 0; +  	mbox_cmd = (struct cxl_mbox_cmd) {  		.opcode = CXL_MBOX_OP_IDENTIFY,  		.size_out = sizeof(id), @@ -1017,6 +1065,11 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)  	cxlds->lsa_size = le32_to_cpu(id.lsa_size);  	memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision)); +	if (test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds)) { +		val = get_unaligned_le24(id.poison_list_max_mer); +		cxlds->poison.max_errors = min_t(u32, val, CXL_POISON_LIST_MAX); +	} +  	return 0;  }  EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL); @@ -1052,6 +1105,13 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds)  	struct device *dev = cxlds->dev;  	int rc; +	if (!cxlds->media_ready) { +		cxlds->dpa_res = DEFINE_RES_MEM(0, 0); +		cxlds->ram_res = DEFINE_RES_MEM(0, 0); +		cxlds->pmem_res = DEFINE_RES_MEM(0, 0); +		return 0; +	} +  	cxlds->dpa_res =  		(struct resource)DEFINE_RES_MEM(0, cxlds->total_bytes); @@ -1107,6 +1167,91 @@ int cxl_set_timestamp(struct cxl_dev_state *cxlds)  }  EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL); +int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, +		       struct cxl_region *cxlr) +{ +	struct cxl_dev_state *cxlds = cxlmd->cxlds; +	struct cxl_mbox_poison_out *po; +	struct cxl_mbox_poison_in pi; +	struct cxl_mbox_cmd mbox_cmd; +	int nr_records = 0; +	int rc; + +	rc = mutex_lock_interruptible(&cxlds->poison.lock); +	if (rc) +		return rc; + +	po = cxlds->poison.list_out; +	pi.offset = cpu_to_le64(offset); +	pi.length = cpu_to_le64(len / CXL_POISON_LEN_MULT); + +	mbox_cmd = (struct cxl_mbox_cmd) { +		.opcode = CXL_MBOX_OP_GET_POISON, +		.size_in = sizeof(pi), +		.payload_in = &pi, +		.size_out = cxlds->payload_size, +		.payload_out = po, +		.min_out = struct_size(po, record, 0), +	}; + +	do { +		rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); +		if (rc) +			break; + +		for (int i = 0; i < le16_to_cpu(po->count); i++) +			trace_cxl_poison(cxlmd, cxlr, &po->record[i], +					 po->flags, po->overflow_ts, +					 CXL_POISON_TRACE_LIST); + +		/* Protect against an uncleared _FLAG_MORE */ +		nr_records = nr_records + le16_to_cpu(po->count); +		if (nr_records >= cxlds->poison.max_errors) { +			dev_dbg(&cxlmd->dev, "Max Error Records reached: %d\n", +				nr_records); +			break; +		} +	} while (po->flags & CXL_POISON_FLAG_MORE); + +	mutex_unlock(&cxlds->poison.lock); +	return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_get_poison, CXL); + +static void free_poison_buf(void *buf) +{ +	kvfree(buf); +} + +/* Get Poison List output buffer is protected by cxlds->poison.lock */ +static int cxl_poison_alloc_buf(struct cxl_dev_state *cxlds) +{ +	cxlds->poison.list_out = kvmalloc(cxlds->payload_size, GFP_KERNEL); +	if (!cxlds->poison.list_out) +		return -ENOMEM; + +	return devm_add_action_or_reset(cxlds->dev, free_poison_buf, +					cxlds->poison.list_out); +} + +int cxl_poison_state_init(struct cxl_dev_state *cxlds) +{ +	int rc; + +	if (!test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds)) +		return 0; + +	rc = cxl_poison_alloc_buf(cxlds); +	if (rc) { +		clear_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds); +		return rc; +	} + +	mutex_init(&cxlds->poison.lock); +	return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, CXL); +  struct cxl_dev_state *cxl_dev_state_create(struct device *dev)  {  	struct cxl_dev_state *cxlds; diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 28a05f2fe32d..057a43267290 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -6,6 +6,7 @@  #include <linux/idr.h>  #include <linux/pci.h>  #include <cxlmem.h> +#include "trace.h"  #include "core.h"  static DECLARE_RWSEM(cxl_memdev_rwsem); @@ -106,6 +107,232 @@ static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,  }  static DEVICE_ATTR_RO(numa_node); +static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd) +{ +	struct cxl_dev_state *cxlds = cxlmd->cxlds; +	u64 offset, length; +	int rc = 0; + +	/* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */ +	if (resource_size(&cxlds->pmem_res)) { +		offset = cxlds->pmem_res.start; +		length = resource_size(&cxlds->pmem_res); +		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); +		if (rc) +			return rc; +	} +	if (resource_size(&cxlds->ram_res)) { +		offset = cxlds->ram_res.start; +		length = resource_size(&cxlds->ram_res); +		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); +		/* +		 * Invalid Physical Address is not an error for +		 * volatile addresses. Device support is optional. +		 */ +		if (rc == -EFAULT) +			rc = 0; +	} +	return rc; +} + +int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) +{ +	struct cxl_port *port; +	int rc; + +	port = dev_get_drvdata(&cxlmd->dev); +	if (!port || !is_cxl_endpoint(port)) +		return -EINVAL; + +	rc = down_read_interruptible(&cxl_dpa_rwsem); +	if (rc) +		return rc; + +	if (port->commit_end == -1) { +		/* No regions mapped to this memdev */ +		rc = cxl_get_poison_by_memdev(cxlmd); +	} else { +		/* Regions mapped, collect poison by endpoint */ +		rc =  cxl_get_poison_by_endpoint(port); +	} +	up_read(&cxl_dpa_rwsem); + +	return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL); + +struct cxl_dpa_to_region_context { +	struct cxl_region *cxlr; +	u64 dpa; +}; + +static int __cxl_dpa_to_region(struct device *dev, void *arg) +{ +	struct cxl_dpa_to_region_context *ctx = arg; +	struct cxl_endpoint_decoder *cxled; +	u64 dpa = ctx->dpa; + +	if (!is_endpoint_decoder(dev)) +		return 0; + +	cxled = to_cxl_endpoint_decoder(dev); +	if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) +		return 0; + +	if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start) +		return 0; + +	dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa, +		dev_name(&cxled->cxld.region->dev)); + +	ctx->cxlr = cxled->cxld.region; + +	return 1; +} + +static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa) +{ +	struct cxl_dpa_to_region_context ctx; +	struct cxl_port *port; + +	ctx = (struct cxl_dpa_to_region_context) { +		.dpa = dpa, +	}; +	port = dev_get_drvdata(&cxlmd->dev); +	if (port && is_cxl_endpoint(port) && port->commit_end != -1) +		device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region); + +	return ctx.cxlr; +} + +static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa) +{ +	struct cxl_dev_state *cxlds = cxlmd->cxlds; + +	if (!IS_ENABLED(CONFIG_DEBUG_FS)) +		return 0; + +	if (!resource_size(&cxlds->dpa_res)) { +		dev_dbg(cxlds->dev, "device has no dpa resource\n"); +		return -EINVAL; +	} +	if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) { +		dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n", +			dpa, &cxlds->dpa_res); +		return -EINVAL; +	} +	if (!IS_ALIGNED(dpa, 64)) { +		dev_dbg(cxlds->dev, "dpa:0x%llx is not 64-byte aligned\n", dpa); +		return -EINVAL; +	} + +	return 0; +} + +int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) +{ +	struct cxl_dev_state *cxlds = cxlmd->cxlds; +	struct cxl_mbox_inject_poison inject; +	struct cxl_poison_record record; +	struct cxl_mbox_cmd mbox_cmd; +	struct cxl_region *cxlr; +	int rc; + +	if (!IS_ENABLED(CONFIG_DEBUG_FS)) +		return 0; + +	rc = down_read_interruptible(&cxl_dpa_rwsem); +	if (rc) +		return rc; + +	rc = cxl_validate_poison_dpa(cxlmd, dpa); +	if (rc) +		goto out; + +	inject.address = cpu_to_le64(dpa); +	mbox_cmd = (struct cxl_mbox_cmd) { +		.opcode = CXL_MBOX_OP_INJECT_POISON, +		.size_in = sizeof(inject), +		.payload_in = &inject, +	}; +	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); +	if (rc) +		goto out; + +	cxlr = cxl_dpa_to_region(cxlmd, dpa); +	if (cxlr) +		dev_warn_once(cxlds->dev, +			      "poison inject dpa:%#llx region: %s\n", dpa, +			      dev_name(&cxlr->dev)); + +	record = (struct cxl_poison_record) { +		.address = cpu_to_le64(dpa), +		.length = cpu_to_le32(1), +	}; +	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT); +out: +	up_read(&cxl_dpa_rwsem); + +	return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL); + +int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) +{ +	struct cxl_dev_state *cxlds = cxlmd->cxlds; +	struct cxl_mbox_clear_poison clear; +	struct cxl_poison_record record; +	struct cxl_mbox_cmd mbox_cmd; +	struct cxl_region *cxlr; +	int rc; + +	if (!IS_ENABLED(CONFIG_DEBUG_FS)) +		return 0; + +	rc = down_read_interruptible(&cxl_dpa_rwsem); +	if (rc) +		return rc; + +	rc = cxl_validate_poison_dpa(cxlmd, dpa); +	if (rc) +		goto out; + +	/* +	 * In CXL 3.0 Spec 8.2.9.8.4.3, the Clear Poison mailbox command +	 * is defined to accept 64 bytes of write-data, along with the +	 * address to clear. This driver uses zeroes as write-data. +	 */ +	clear = (struct cxl_mbox_clear_poison) { +		.address = cpu_to_le64(dpa) +	}; + +	mbox_cmd = (struct cxl_mbox_cmd) { +		.opcode = CXL_MBOX_OP_CLEAR_POISON, +		.size_in = sizeof(clear), +		.payload_in = &clear, +	}; + +	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); +	if (rc) +		goto out; + +	cxlr = cxl_dpa_to_region(cxlmd, dpa); +	if (cxlr) +		dev_warn_once(cxlds->dev, "poison clear dpa:%#llx region: %s\n", +			      dpa, dev_name(&cxlr->dev)); + +	record = (struct cxl_poison_record) { +		.address = cpu_to_le64(dpa), +		.length = cpu_to_le32(1), +	}; +	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR); +out: +	up_read(&cxl_dpa_rwsem); + +	return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, CXL); +  static struct attribute *cxl_memdev_attributes[] = {  	&dev_attr_serial.attr,  	&dev_attr_firmware_version.attr, diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 7328a2552411..67f4ab6daa34 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -101,23 +101,57 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port)  }  EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL); -/* - * Wait up to @media_ready_timeout for the device to report memory - * active. - */ -int cxl_await_media_ready(struct cxl_dev_state *cxlds) +static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id) +{ +	struct pci_dev *pdev = to_pci_dev(cxlds->dev); +	int d = cxlds->cxl_dvsec; +	bool valid = false; +	int rc, i; +	u32 temp; + +	if (id > CXL_DVSEC_RANGE_MAX) +		return -EINVAL; + +	/* Check MEM INFO VALID bit first, give up after 1s */ +	i = 1; +	do { +		rc = pci_read_config_dword(pdev, +					   d + CXL_DVSEC_RANGE_SIZE_LOW(id), +					   &temp); +		if (rc) +			return rc; + +		valid = FIELD_GET(CXL_DVSEC_MEM_INFO_VALID, temp); +		if (valid) +			break; +		msleep(1000); +	} while (i--); + +	if (!valid) { +		dev_err(&pdev->dev, +			"Timeout awaiting memory range %d valid after 1s.\n", +			id); +		return -ETIMEDOUT; +	} + +	return 0; +} + +static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id)  {  	struct pci_dev *pdev = to_pci_dev(cxlds->dev);  	int d = cxlds->cxl_dvsec;  	bool active = false; -	u64 md_status;  	int rc, i; +	u32 temp; -	for (i = media_ready_timeout; i; i--) { -		u32 temp; +	if (id > CXL_DVSEC_RANGE_MAX) +		return -EINVAL; +	/* Check MEM ACTIVE bit, up to 60s timeout by default */ +	for (i = media_ready_timeout; i; i--) {  		rc = pci_read_config_dword( -			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); +			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(id), &temp);  		if (rc)  			return rc; @@ -134,6 +168,39 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds)  		return -ETIMEDOUT;  	} +	return 0; +} + +/* + * Wait up to @media_ready_timeout for the device to report memory + * active. + */ +int cxl_await_media_ready(struct cxl_dev_state *cxlds) +{ +	struct pci_dev *pdev = to_pci_dev(cxlds->dev); +	int d = cxlds->cxl_dvsec; +	int rc, i, hdm_count; +	u64 md_status; +	u16 cap; + +	rc = pci_read_config_word(pdev, +				  d + CXL_DVSEC_CAP_OFFSET, &cap); +	if (rc) +		return rc; + +	hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); +	for (i = 0; i < hdm_count; i++) { +		rc = cxl_dvsec_mem_range_valid(cxlds, i); +		if (rc) +			return rc; +	} + +	for (i = 0; i < hdm_count; i++) { +		rc = cxl_dvsec_mem_range_active(cxlds, i); +		if (rc) +			return rc; +	} +  	md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);  	if (!CXLMDEV_READY(md_status))  		return -EIO; @@ -241,17 +308,36 @@ static void disable_hdm(void *_cxlhdm)  	       hdm + CXL_HDM_DECODER_CTRL_OFFSET);  } -static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm) +int devm_cxl_enable_hdm(struct cxl_port *port, struct cxl_hdm *cxlhdm)  { -	void __iomem *hdm = cxlhdm->regs.hdm_decoder; +	void __iomem *hdm;  	u32 global_ctrl; +	/* +	 * If the hdm capability was not mapped there is nothing to enable and +	 * the caller is responsible for what happens next.  For example, +	 * emulate a passthrough decoder. +	 */ +	if (IS_ERR(cxlhdm)) +		return 0; + +	hdm = cxlhdm->regs.hdm_decoder;  	global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); + +	/* +	 * If the HDM decoder capability was enabled on entry, skip +	 * registering disable_hdm() since this decode capability may be +	 * owned by platform firmware. +	 */ +	if (global_ctrl & CXL_HDM_DECODER_ENABLE) +		return 0; +  	writel(global_ctrl | CXL_HDM_DECODER_ENABLE,  	       hdm + CXL_HDM_DECODER_CTRL_OFFSET); -	return devm_add_action_or_reset(host, disable_hdm, cxlhdm); +	return devm_add_action_or_reset(&port->dev, disable_hdm, cxlhdm);  } +EXPORT_SYMBOL_NS_GPL(devm_cxl_enable_hdm, CXL);  int cxl_dvsec_rr_decode(struct device *dev, int d,  			struct cxl_endpoint_dvsec_info *info) @@ -425,7 +511,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,  	if (info->mem_enabled)  		return 0; -	rc = devm_cxl_enable_hdm(&port->dev, cxlhdm); +	rc = devm_cxl_enable_hdm(port, cxlhdm);  	if (rc)  		return rc; @@ -441,79 +527,33 @@ EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL);  #define CXL_DOE_TABLE_ACCESS_LAST_ENTRY		0xffff  #define CXL_DOE_PROTOCOL_TABLE_ACCESS 2 -static struct pci_doe_mb *find_cdat_doe(struct device *uport) -{ -	struct cxl_memdev *cxlmd; -	struct cxl_dev_state *cxlds; -	unsigned long index; -	void *entry; - -	cxlmd = to_cxl_memdev(uport); -	cxlds = cxlmd->cxlds; - -	xa_for_each(&cxlds->doe_mbs, index, entry) { -		struct pci_doe_mb *cur = entry; - -		if (pci_doe_supports_prot(cur, PCI_DVSEC_VENDOR_ID_CXL, -					  CXL_DOE_PROTOCOL_TABLE_ACCESS)) -			return cur; -	} - -	return NULL; -} - -#define CDAT_DOE_REQ(entry_handle)					\ +#define CDAT_DOE_REQ(entry_handle) cpu_to_le32				\  	(FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE,			\  		    CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) |		\  	 FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE,			\  		    CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) |		\  	 FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle))) -static void cxl_doe_task_complete(struct pci_doe_task *task) -{ -	complete(task->private); -} - -struct cdat_doe_task { -	u32 request_pl; -	u32 response_pl[32]; -	struct completion c; -	struct pci_doe_task task; -}; - -#define DECLARE_CDAT_DOE_TASK(req, cdt)                       \ -struct cdat_doe_task cdt = {                                  \ -	.c = COMPLETION_INITIALIZER_ONSTACK(cdt.c),           \ -	.request_pl = req,				      \ -	.task = {                                             \ -		.prot.vid = PCI_DVSEC_VENDOR_ID_CXL,        \ -		.prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, \ -		.request_pl = &cdt.request_pl,                \ -		.request_pl_sz = sizeof(cdt.request_pl),      \ -		.response_pl = cdt.response_pl,               \ -		.response_pl_sz = sizeof(cdt.response_pl),    \ -		.complete = cxl_doe_task_complete,            \ -		.private = &cdt.c,                            \ -	}                                                     \ -} -  static int cxl_cdat_get_length(struct device *dev,  			       struct pci_doe_mb *cdat_doe,  			       size_t *length)  { -	DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(0), t); +	__le32 request = CDAT_DOE_REQ(0); +	__le32 response[2];  	int rc; -	rc = pci_doe_submit_task(cdat_doe, &t.task); +	rc = pci_doe(cdat_doe, PCI_DVSEC_VENDOR_ID_CXL, +		     CXL_DOE_PROTOCOL_TABLE_ACCESS, +		     &request, sizeof(request), +		     &response, sizeof(response));  	if (rc < 0) { -		dev_err(dev, "DOE submit failed: %d", rc); +		dev_err(dev, "DOE failed: %d", rc);  		return rc;  	} -	wait_for_completion(&t.c); -	if (t.task.rv < sizeof(u32)) +	if (rc < sizeof(response))  		return -EIO; -	*length = t.response_pl[1]; +	*length = le32_to_cpu(response[1]);  	dev_dbg(dev, "CDAT length %zu\n", *length);  	return 0; @@ -521,44 +561,56 @@ static int cxl_cdat_get_length(struct device *dev,  static int cxl_cdat_read_table(struct device *dev,  			       struct pci_doe_mb *cdat_doe, -			       struct cxl_cdat *cdat) +			       void *cdat_table, size_t *cdat_length)  { -	size_t length = cdat->length; -	u32 *data = cdat->table; +	size_t length = *cdat_length + sizeof(__le32); +	__le32 *data = cdat_table;  	int entry_handle = 0; +	__le32 saved_dw = 0;  	do { -		DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t); +		__le32 request = CDAT_DOE_REQ(entry_handle); +		struct cdat_entry_header *entry;  		size_t entry_dw; -		u32 *entry;  		int rc; -		rc = pci_doe_submit_task(cdat_doe, &t.task); +		rc = pci_doe(cdat_doe, PCI_DVSEC_VENDOR_ID_CXL, +			     CXL_DOE_PROTOCOL_TABLE_ACCESS, +			     &request, sizeof(request), +			     data, length);  		if (rc < 0) { -			dev_err(dev, "DOE submit failed: %d", rc); +			dev_err(dev, "DOE failed: %d", rc);  			return rc;  		} -		wait_for_completion(&t.c); -		/* 1 DW header + 1 DW data min */ -		if (t.task.rv < (2 * sizeof(u32))) + +		/* 1 DW Table Access Response Header + CDAT entry */ +		entry = (struct cdat_entry_header *)(data + 1); +		if ((entry_handle == 0 && +		     rc != sizeof(__le32) + sizeof(struct cdat_header)) || +		    (entry_handle > 0 && +		     (rc < sizeof(__le32) + sizeof(*entry) || +		      rc != sizeof(__le32) + le16_to_cpu(entry->length))))  			return -EIO;  		/* Get the CXL table access header entry handle */  		entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, -					 t.response_pl[0]); -		entry = t.response_pl + 1; -		entry_dw = t.task.rv / sizeof(u32); +					 le32_to_cpu(data[0])); +		entry_dw = rc / sizeof(__le32);  		/* Skip Header */  		entry_dw -= 1; -		entry_dw = min(length / sizeof(u32), entry_dw); -		/* Prevent length < 1 DW from causing a buffer overflow */ -		if (entry_dw) { -			memcpy(data, entry, entry_dw * sizeof(u32)); -			length -= entry_dw * sizeof(u32); -			data += entry_dw; -		} +		/* +		 * Table Access Response Header overwrote the last DW of +		 * previous entry, so restore that DW +		 */ +		*data = saved_dw; +		length -= entry_dw * sizeof(__le32); +		data += entry_dw; +		saved_dw = *data;  	} while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY); +	/* Length in CDAT header may exceed concatenation of CDAT entries */ +	*cdat_length -= length - sizeof(__le32); +  	return 0;  } @@ -570,13 +622,19 @@ static int cxl_cdat_read_table(struct device *dev,   */  void read_cdat_data(struct cxl_port *port)  { -	struct pci_doe_mb *cdat_doe; +	struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); +	struct device *host = cxlmd->dev.parent;  	struct device *dev = &port->dev; -	struct device *uport = port->uport; +	struct pci_doe_mb *cdat_doe;  	size_t cdat_length; +	void *cdat_table;  	int rc; -	cdat_doe = find_cdat_doe(uport); +	if (!dev_is_pci(host)) +		return; +	cdat_doe = pci_find_doe_mailbox(to_pci_dev(host), +					PCI_DVSEC_VENDOR_ID_CXL, +					CXL_DOE_PROTOCOL_TABLE_ACCESS);  	if (!cdat_doe) {  		dev_dbg(dev, "No CDAT mailbox\n");  		return; @@ -589,19 +647,21 @@ void read_cdat_data(struct cxl_port *port)  		return;  	} -	port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL); -	if (!port->cdat.table) +	cdat_table = devm_kzalloc(dev, cdat_length + sizeof(__le32), +				  GFP_KERNEL); +	if (!cdat_table)  		return; -	port->cdat.length = cdat_length; -	rc = cxl_cdat_read_table(dev, cdat_doe, &port->cdat); +	rc = cxl_cdat_read_table(dev, cdat_doe, cdat_table, &cdat_length);  	if (rc) {  		/* Don't leave table data allocated on error */ -		devm_kfree(dev, port->cdat.table); -		port->cdat.table = NULL; -		port->cdat.length = 0; +		devm_kfree(dev, cdat_table);  		dev_err(dev, "CDAT data read error\n"); +		return;  	} + +	port->cdat.table = cdat_table + sizeof(__le32); +	port->cdat.length = cdat_length;  }  EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index c2e4b1093788..f8c38d997252 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -62,9 +62,9 @@ static int match_nvdimm_bridge(struct device *dev, void *data)  	return is_cxl_nvdimm_bridge(dev);  } -struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *start) +struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd)  { -	struct cxl_port *port = find_cxl_root(start); +	struct cxl_port *port = find_cxl_root(dev_get_drvdata(&cxlmd->dev));  	struct device *dev;  	if (!port) @@ -253,7 +253,7 @@ int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd)  	struct device *dev;  	int rc; -	cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev); +	cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);  	if (!cxl_nvb)  		return -ENODEV; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 8ee6b6e2e2a4..e7c284c890bc 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1,6 +1,5 @@  // SPDX-License-Identifier: GPL-2.0-only  /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ -#include <linux/io-64-nonatomic-lo-hi.h>  #include <linux/memregion.h>  #include <linux/workqueue.h>  #include <linux/debugfs.h> @@ -751,11 +750,10 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,  	parent_port = parent_dport ? parent_dport->port : NULL;  	if (IS_ERR(port)) { -		dev_dbg(uport, "Failed to add %s%s%s%s: %ld\n", -			dev_name(&port->dev), -			parent_port ? " to " : "", +		dev_dbg(uport, "Failed to add%s%s%s: %ld\n", +			parent_port ? " port to " : "",  			parent_port ? dev_name(&parent_port->dev) : "", -			parent_port ? "" : " (root port)", +			parent_port ? "" : " root port",  			PTR_ERR(port));  	} else {  		dev_dbg(uport, "%s added%s%s%s\n", @@ -823,41 +821,17 @@ static bool dev_is_cxl_root_child(struct device *dev)  	return false;  } -/* Find a 2nd level CXL port that has a dport that is an ancestor of @match */ -static int match_root_child(struct device *dev, const void *match) +struct cxl_port *find_cxl_root(struct cxl_port *port)  { -	const struct device *iter = NULL; -	struct cxl_dport *dport; -	struct cxl_port *port; - -	if (!dev_is_cxl_root_child(dev)) -		return 0; - -	port = to_cxl_port(dev); -	iter = match; -	while (iter) { -		dport = cxl_find_dport_by_dev(port, iter); -		if (dport) -			break; -		iter = iter->parent; -	} +	struct cxl_port *iter = port; -	return !!iter; -} +	while (iter && !is_cxl_root(iter)) +		iter = to_cxl_port(iter->dev.parent); -struct cxl_port *find_cxl_root(struct device *dev) -{ -	struct device *port_dev; -	struct cxl_port *root; - -	port_dev = bus_find_device(&cxl_bus_type, NULL, dev, match_root_child); -	if (!port_dev) +	if (!iter)  		return NULL; - -	root = to_cxl_port(port_dev->parent); -	get_device(&root->dev); -	put_device(port_dev); -	return root; +	get_device(&iter->dev); +	return iter;  }  EXPORT_SYMBOL_NS_GPL(find_cxl_root, CXL); @@ -1927,7 +1901,7 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)  EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);  /* for user tooling to ensure port disable work has completed */ -static ssize_t flush_store(struct bus_type *bus, const char *buf, size_t count) +static ssize_t flush_store(const struct bus_type *bus, const char *buf, size_t count)  {  	if (sysfs_streq(buf, "1")) {  		flush_workqueue(cxl_bus_wq); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index f29028148806..f822de44bee0 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -134,9 +134,13 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)  		struct cxl_endpoint_decoder *cxled = p->targets[i];  		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);  		struct cxl_port *iter = cxled_to_port(cxled); +		struct cxl_dev_state *cxlds = cxlmd->cxlds;  		struct cxl_ep *ep;  		int rc = 0; +		if (cxlds->rcd) +			goto endpoint_reset; +  		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))  			iter = to_cxl_port(iter->dev.parent); @@ -153,6 +157,7 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)  				return rc;  		} +endpoint_reset:  		rc = cxled->cxld.reset(&cxled->cxld);  		if (rc)  			return rc; @@ -1199,6 +1204,7 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr)  {  	struct cxl_region_params *p = &cxlr->params;  	struct cxl_endpoint_decoder *cxled; +	struct cxl_dev_state *cxlds;  	struct cxl_memdev *cxlmd;  	struct cxl_port *iter;  	struct cxl_ep *ep; @@ -1214,6 +1220,10 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr)  	for (i = 0; i < p->nr_targets; i++) {  		cxled = p->targets[i];  		cxlmd = cxled_to_memdev(cxled); +		cxlds = cxlmd->cxlds; + +		if (cxlds->rcd) +			continue;  		iter = cxled_to_port(cxled);  		while (!is_cxl_root(to_cxl_port(iter->dev.parent))) @@ -1229,14 +1239,24 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr)  {  	struct cxl_region_params *p = &cxlr->params;  	struct cxl_endpoint_decoder *cxled; +	struct cxl_dev_state *cxlds; +	int i, rc, rch = 0, vh = 0;  	struct cxl_memdev *cxlmd;  	struct cxl_port *iter;  	struct cxl_ep *ep; -	int i, rc;  	for (i = 0; i < p->nr_targets; i++) {  		cxled = p->targets[i];  		cxlmd = cxled_to_memdev(cxled); +		cxlds = cxlmd->cxlds; + +		/* validate that all targets agree on topology */ +		if (!cxlds->rcd) { +			vh++; +		} else { +			rch++; +			continue; +		}  		iter = cxled_to_port(cxled);  		while (!is_cxl_root(to_cxl_port(iter->dev.parent))) @@ -1256,6 +1276,12 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr)  		}  	} +	if (rch && vh) { +		dev_err(&cxlr->dev, "mismatched CXL topologies detected\n"); +		cxl_region_teardown_targets(cxlr); +		return -ENXIO; +	} +  	return 0;  } @@ -1648,6 +1674,7 @@ static int cxl_region_attach(struct cxl_region *cxlr,  		if (rc)  			goto err_decrement;  		p->state = CXL_CONFIG_ACTIVE; +		set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);  	}  	cxled->cxld.interleave_ways = p->interleave_ways; @@ -1749,8 +1776,6 @@ static int attach_target(struct cxl_region *cxlr,  	down_read(&cxl_dpa_rwsem);  	rc = cxl_region_attach(cxlr, cxled, pos); -	if (rc == 0) -		set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);  	up_read(&cxl_dpa_rwsem);  	up_write(&cxl_region_rwsem);  	return rc; @@ -2213,6 +2238,130 @@ struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)  }  EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, CXL); +struct cxl_poison_context { +	struct cxl_port *port; +	enum cxl_decoder_mode mode; +	u64 offset; +}; + +static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd, +				   struct cxl_poison_context *ctx) +{ +	struct cxl_dev_state *cxlds = cxlmd->cxlds; +	u64 offset, length; +	int rc = 0; + +	/* +	 * Collect poison for the remaining unmapped resources +	 * after poison is collected by committed endpoints. +	 * +	 * Knowing that PMEM must always follow RAM, get poison +	 * for unmapped resources based on the last decoder's mode: +	 *	ram: scan remains of ram range, then any pmem range +	 *	pmem: scan remains of pmem range +	 */ + +	if (ctx->mode == CXL_DECODER_RAM) { +		offset = ctx->offset; +		length = resource_size(&cxlds->ram_res) - offset; +		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); +		if (rc == -EFAULT) +			rc = 0; +		if (rc) +			return rc; +	} +	if (ctx->mode == CXL_DECODER_PMEM) { +		offset = ctx->offset; +		length = resource_size(&cxlds->dpa_res) - offset; +		if (!length) +			return 0; +	} else if (resource_size(&cxlds->pmem_res)) { +		offset = cxlds->pmem_res.start; +		length = resource_size(&cxlds->pmem_res); +	} else { +		return 0; +	} + +	return cxl_mem_get_poison(cxlmd, offset, length, NULL); +} + +static int poison_by_decoder(struct device *dev, void *arg) +{ +	struct cxl_poison_context *ctx = arg; +	struct cxl_endpoint_decoder *cxled; +	struct cxl_memdev *cxlmd; +	u64 offset, length; +	int rc = 0; + +	if (!is_endpoint_decoder(dev)) +		return rc; + +	cxled = to_cxl_endpoint_decoder(dev); +	if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) +		return rc; + +	/* +	 * Regions are only created with single mode decoders: pmem or ram. +	 * Linux does not support mixed mode decoders. This means that +	 * reading poison per endpoint decoder adheres to the requirement +	 * that poison reads of pmem and ram must be separated. +	 * CXL 3.0 Spec 8.2.9.8.4.1 +	 */ +	if (cxled->mode == CXL_DECODER_MIXED) { +		dev_dbg(dev, "poison list read unsupported in mixed mode\n"); +		return rc; +	} + +	cxlmd = cxled_to_memdev(cxled); +	if (cxled->skip) { +		offset = cxled->dpa_res->start - cxled->skip; +		length = cxled->skip; +		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); +		if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM) +			rc = 0; +		if (rc) +			return rc; +	} + +	offset = cxled->dpa_res->start; +	length = cxled->dpa_res->end - offset + 1; +	rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region); +	if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM) +		rc = 0; +	if (rc) +		return rc; + +	/* Iterate until commit_end is reached */ +	if (cxled->cxld.id == ctx->port->commit_end) { +		ctx->offset = cxled->dpa_res->end + 1; +		ctx->mode = cxled->mode; +		return 1; +	} + +	return 0; +} + +int cxl_get_poison_by_endpoint(struct cxl_port *port) +{ +	struct cxl_poison_context ctx; +	int rc = 0; + +	rc = down_read_interruptible(&cxl_region_rwsem); +	if (rc) +		return rc; + +	ctx = (struct cxl_poison_context) { +		.port = port +	}; + +	rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder); +	if (rc == 1) +		rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport), &ctx); + +	up_read(&cxl_region_rwsem); +	return rc; +} +  static struct lock_class_key cxl_pmem_region_key;  static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr) @@ -2251,7 +2400,7 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)  		 * bridge for one device is the same for all.  		 */  		if (i == 0) { -			cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev); +			cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);  			if (!cxl_nvb) {  				cxlr_pmem = ERR_PTR(-ENODEV);  				goto out; diff --git a/drivers/cxl/core/trace.c b/drivers/cxl/core/trace.c index 29ae7ce81dc5..d0403dc3c8ab 100644 --- a/drivers/cxl/core/trace.c +++ b/drivers/cxl/core/trace.c @@ -1,5 +1,99 @@  // SPDX-License-Identifier: GPL-2.0-only  /* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include <cxl.h> +#include "core.h" +  #define CREATE_TRACE_POINTS  #include "trace.h" + +static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos) +{ +	struct cxl_region_params *p = &cxlr->params; +	int gran = p->interleave_granularity; +	int ways = p->interleave_ways; +	u64 offset; + +	/* Is the hpa within this region at all */ +	if (hpa < p->res->start || hpa > p->res->end) { +		dev_dbg(&cxlr->dev, +			"Addr trans fail: hpa 0x%llx not in region\n", hpa); +		return false; +	} + +	/* Is the hpa in an expected chunk for its pos(-ition) */ +	offset = hpa - p->res->start; +	offset = do_div(offset, gran * ways); +	if ((offset >= pos * gran) && (offset < (pos + 1) * gran)) +		return true; + +	dev_dbg(&cxlr->dev, +		"Addr trans fail: hpa 0x%llx not in expected chunk\n", hpa); + +	return false; +} + +static u64 cxl_dpa_to_hpa(u64 dpa,  struct cxl_region *cxlr, +			  struct cxl_endpoint_decoder *cxled) +{ +	u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa; +	struct cxl_region_params *p = &cxlr->params; +	int pos = cxled->pos; +	u16 eig = 0; +	u8 eiw = 0; + +	ways_to_eiw(p->interleave_ways, &eiw); +	granularity_to_eig(p->interleave_granularity, &eig); + +	/* +	 * The device position in the region interleave set was removed +	 * from the offset at HPA->DPA translation. To reconstruct the +	 * HPA, place the 'pos' in the offset. +	 * +	 * The placement of 'pos' in the HPA is determined by interleave +	 * ways and granularity and is defined in the CXL Spec 3.0 Section +	 * 8.2.4.19.13 Implementation Note: Device Decode Logic +	 */ + +	/* Remove the dpa base */ +	dpa_offset = dpa - cxl_dpa_resource_start(cxled); + +	mask_upper = GENMASK_ULL(51, eig + 8); + +	if (eiw < 8) { +		hpa_offset = (dpa_offset & mask_upper) << eiw; +		hpa_offset |= pos << (eig + 8); +	} else { +		bits_upper = (dpa_offset & mask_upper) >> (eig + 8); +		bits_upper = bits_upper * 3; +		hpa_offset = ((bits_upper << (eiw - 8)) + pos) << (eig + 8); +	} + +	/* The lower bits remain unchanged */ +	hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0); + +	/* Apply the hpa_offset to the region base address */ +	hpa = hpa_offset + p->res->start; + +	if (!cxl_is_hpa_in_range(hpa, cxlr, cxled->pos)) +		return ULLONG_MAX; + +	return hpa; +} + +u64 cxl_trace_hpa(struct cxl_region *cxlr, struct cxl_memdev *cxlmd, +		  u64 dpa) +{ +	struct cxl_region_params *p = &cxlr->params; +	struct cxl_endpoint_decoder *cxled = NULL; + +	for (int i = 0; i <  p->nr_targets; i++) { +		cxled = p->targets[i]; +		if (cxlmd == cxled_to_memdev(cxled)) +			break; +	} +	if (!cxled || cxlmd != cxled_to_memdev(cxled)) +		return ULLONG_MAX; + +	return cxl_dpa_to_hpa(dpa, cxlr, cxled); +} diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 9b8d3d997834..a0b5819bc70b 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -7,10 +7,12 @@  #define _CXL_EVENTS_H  #include <linux/tracepoint.h> +#include <linux/pci.h>  #include <asm-generic/unaligned.h>  #include <cxl.h>  #include <cxlmem.h> +#include "core.h"  #define CXL_RAS_UC_CACHE_DATA_PARITY	BIT(0)  #define CXL_RAS_UC_CACHE_ADDR_PARITY	BIT(1) @@ -600,6 +602,107 @@ TRACE_EVENT(cxl_memory_module,  	)  ); +#define show_poison_trace_type(type)			\ +	__print_symbolic(type,				\ +	{ CXL_POISON_TRACE_LIST,	"List"   },	\ +	{ CXL_POISON_TRACE_INJECT,	"Inject" },	\ +	{ CXL_POISON_TRACE_CLEAR,	"Clear"  }) + +#define __show_poison_source(source)                          \ +	__print_symbolic(source,                              \ +		{ CXL_POISON_SOURCE_UNKNOWN,   "Unknown"  },  \ +		{ CXL_POISON_SOURCE_EXTERNAL,  "External" },  \ +		{ CXL_POISON_SOURCE_INTERNAL,  "Internal" },  \ +		{ CXL_POISON_SOURCE_INJECTED,  "Injected" },  \ +		{ CXL_POISON_SOURCE_VENDOR,    "Vendor"   }) + +#define show_poison_source(source)			     \ +	(((source > CXL_POISON_SOURCE_INJECTED) &&	     \ +	 (source != CXL_POISON_SOURCE_VENDOR)) ? "Reserved"  \ +	 : __show_poison_source(source)) + +#define show_poison_flags(flags)                             \ +	__print_flags(flags, "|",                            \ +		{ CXL_POISON_FLAG_MORE,      "More"     },   \ +		{ CXL_POISON_FLAG_OVERFLOW,  "Overflow"  },  \ +		{ CXL_POISON_FLAG_SCANNING,  "Scanning"  }) + +#define __cxl_poison_addr(record)					\ +	(le64_to_cpu(record->address)) +#define cxl_poison_record_dpa(record)					\ +	(__cxl_poison_addr(record) & CXL_POISON_START_MASK) +#define cxl_poison_record_source(record)				\ +	(__cxl_poison_addr(record)  & CXL_POISON_SOURCE_MASK) +#define cxl_poison_record_dpa_length(record)				\ +	(le32_to_cpu(record->length) * CXL_POISON_LEN_MULT) +#define cxl_poison_overflow(flags, time)				\ +	(flags & CXL_POISON_FLAG_OVERFLOW ? le64_to_cpu(time) : 0) + +u64 cxl_trace_hpa(struct cxl_region *cxlr, struct cxl_memdev *memdev, u64 dpa); + +TRACE_EVENT(cxl_poison, + +	TP_PROTO(struct cxl_memdev *cxlmd, struct cxl_region *region, +		 const struct cxl_poison_record *record, u8 flags, +		 __le64 overflow_ts, enum cxl_poison_trace_type trace_type), + +	TP_ARGS(cxlmd, region, record, flags, overflow_ts, trace_type), + +	TP_STRUCT__entry( +		__string(memdev, dev_name(&cxlmd->dev)) +		__string(host, dev_name(cxlmd->dev.parent)) +		__field(u64, serial) +		__field(u8, trace_type) +		__string(region, region) +		__field(u64, overflow_ts) +		__field(u64, hpa) +		__field(u64, dpa) +		__field(u32, dpa_length) +		__array(char, uuid, 16) +		__field(u8, source) +		__field(u8, flags) +	    ), + +	TP_fast_assign( +		__assign_str(memdev, dev_name(&cxlmd->dev)); +		__assign_str(host, dev_name(cxlmd->dev.parent)); +		__entry->serial = cxlmd->cxlds->serial; +		__entry->overflow_ts = cxl_poison_overflow(flags, overflow_ts); +		__entry->dpa = cxl_poison_record_dpa(record); +		__entry->dpa_length = cxl_poison_record_dpa_length(record); +		__entry->source = cxl_poison_record_source(record); +		__entry->trace_type = trace_type; +		__entry->flags = flags; +		if (region) { +			__assign_str(region, dev_name(®ion->dev)); +			memcpy(__entry->uuid, ®ion->params.uuid, 16); +			__entry->hpa = cxl_trace_hpa(region, cxlmd, +						     __entry->dpa); +		} else { +			__assign_str(region, ""); +			memset(__entry->uuid, 0, 16); +			__entry->hpa = ULLONG_MAX; +		} +	    ), + +	TP_printk("memdev=%s host=%s serial=%lld trace_type=%s region=%s "  \ +		"region_uuid=%pU hpa=0x%llx dpa=0x%llx dpa_length=0x%x "    \ +		"source=%s flags=%s overflow_time=%llu", +		__get_str(memdev), +		__get_str(host), +		__entry->serial, +		show_poison_trace_type(__entry->trace_type), +		__get_str(region), +		__entry->uuid, +		__entry->hpa, +		__entry->dpa, +		__entry->dpa_length, +		show_poison_source(__entry->source), +		show_poison_flags(__entry->flags), +		__entry->overflow_ts +	) +); +  #endif /* _CXL_EVENTS_H */  #define TRACE_INCLUDE_FILE trace  |