diff options
Diffstat (limited to 'drivers/infiniband/core/nldev.c')
| -rw-r--r-- | drivers/infiniband/core/nldev.c | 394 | 
1 files changed, 388 insertions, 6 deletions
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 0dcd1aa6f683..fa8655e3b3ed 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -31,6 +31,8 @@   */  #include <linux/module.h> +#include <linux/pid.h> +#include <linux/pid_namespace.h>  #include <net/netlink.h>  #include <rdma/rdma_netlink.h> @@ -52,16 +54,42 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {  	[RDMA_NLDEV_ATTR_PORT_STATE]	= { .type = NLA_U8 },  	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },  	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 }, +	[RDMA_NLDEV_ATTR_RES_SUMMARY]	= { .type = NLA_NESTED }, +	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED }, +	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING, +					     .len = 16 }, +	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 }, +	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED }, +	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED }, +	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 }, +	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 }, +	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 }, +	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 }, +	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 }, +	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 }, +	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 }, +	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 }, +	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING, +						    .len = TASK_COMM_LEN },  }; -static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) +static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)  { -	char fw[IB_FW_VERSION_NAME_MAX]; -  	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))  		return -EMSGSIZE;  	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))  		return -EMSGSIZE; + +	return 0; +} + +static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) +{ +	char fw[IB_FW_VERSION_NAME_MAX]; + +	if (fill_nldev_handle(msg, device)) +		return -EMSGSIZE; +  	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))  		return -EMSGSIZE; @@ -92,10 +120,9 @@ static int fill_port_info(struct sk_buff *msg,  	struct ib_port_attr attr;  	int ret; -	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) -		return -EMSGSIZE; -	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name)) +	if (fill_nldev_handle(msg, device))  		return -EMSGSIZE; +  	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))  		return -EMSGSIZE; @@ -126,6 +153,137 @@ static int fill_port_info(struct sk_buff *msg,  	return 0;  } +static int fill_res_info_entry(struct sk_buff *msg, +			       const char *name, u64 curr) +{ +	struct nlattr *entry_attr; + +	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY); +	if (!entry_attr) +		return -EMSGSIZE; + +	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name)) +		goto err; +	if (nla_put_u64_64bit(msg, +			      RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 0)) +		goto err; + +	nla_nest_end(msg, entry_attr); +	return 0; + +err: +	nla_nest_cancel(msg, entry_attr); +	return -EMSGSIZE; +} + +static int fill_res_info(struct sk_buff *msg, struct ib_device *device) +{ +	static const char * const names[RDMA_RESTRACK_MAX] = { +		[RDMA_RESTRACK_PD] = "pd", +		[RDMA_RESTRACK_CQ] = "cq", +		[RDMA_RESTRACK_QP] = "qp", +	}; + +	struct rdma_restrack_root *res = &device->res; +	struct nlattr *table_attr; +	int ret, i, curr; + +	if (fill_nldev_handle(msg, device)) +		return -EMSGSIZE; + +	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY); +	if (!table_attr) +		return -EMSGSIZE; + +	for (i = 0; i < RDMA_RESTRACK_MAX; i++) { +		if (!names[i]) +			continue; +		curr = rdma_restrack_count(res, i, task_active_pid_ns(current)); +		ret = fill_res_info_entry(msg, names[i], curr); +		if (ret) +			goto err; +	} + +	nla_nest_end(msg, table_attr); +	return 0; + +err: +	nla_nest_cancel(msg, table_attr); +	return ret; +} + +static int fill_res_qp_entry(struct sk_buff *msg, +			     struct ib_qp *qp, uint32_t port) +{ +	struct rdma_restrack_entry *res = &qp->res; +	struct ib_qp_init_attr qp_init_attr; +	struct nlattr *entry_attr; +	struct ib_qp_attr qp_attr; +	int ret; + +	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr); +	if (ret) +		return ret; + +	if (port && port != qp_attr.port_num) +		return 0; + +	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY); +	if (!entry_attr) +		goto out; + +	/* In create_qp() port is not set yet */ +	if (qp_attr.port_num && +	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num)) +		goto err; + +	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num)) +		goto err; +	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) { +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN, +				qp_attr.dest_qp_num)) +			goto err; +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN, +				qp_attr.rq_psn)) +			goto err; +	} + +	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn)) +		goto err; + +	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC || +	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) { +		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE, +			       qp_attr.path_mig_state)) +			goto err; +	} +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type)) +		goto err; +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state)) +		goto err; + +	/* +	 * Existence of task means that it is user QP and netlink +	 * user is invited to go and read /proc/PID/comm to get name +	 * of the task file and res->task_com should be NULL. +	 */ +	if (rdma_is_kernel_res(res)) { +		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, res->kern_name)) +			goto err; +	} else { +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, task_pid_vnr(res->task))) +			goto err; +	} + +	nla_nest_end(msg, entry_attr); +	return 0; + +err: +	nla_nest_cancel(msg, entry_attr); +out: +	return -EMSGSIZE; +} +  static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,  			  struct netlink_ext_ack *extack)  { @@ -321,6 +479,213 @@ out:  	return skb->len;  } +static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, +			      struct netlink_ext_ack *extack) +{ +	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; +	struct ib_device *device; +	struct sk_buff *msg; +	u32 index; +	int ret; + +	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, +			  nldev_policy, extack); +	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) +		return -EINVAL; + +	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); +	device = ib_device_get_by_index(index); +	if (!device) +		return -EINVAL; + +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); +	if (!msg) +		goto err; + +	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, +			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), +			0, 0); + +	ret = fill_res_info(msg, device); +	if (ret) +		goto err_free; + +	nlmsg_end(msg, nlh); +	put_device(&device->dev); +	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + +err_free: +	nlmsg_free(msg); +err: +	put_device(&device->dev); +	return ret; +} + +static int _nldev_res_get_dumpit(struct ib_device *device, +				 struct sk_buff *skb, +				 struct netlink_callback *cb, +				 unsigned int idx) +{ +	int start = cb->args[0]; +	struct nlmsghdr *nlh; + +	if (idx < start) +		return 0; + +	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, +			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), +			0, NLM_F_MULTI); + +	if (fill_res_info(skb, device)) { +		nlmsg_cancel(skb, nlh); +		goto out; +	} + +	nlmsg_end(skb, nlh); + +	idx++; + +out: +	cb->args[0] = idx; +	return skb->len; +} + +static int nldev_res_get_dumpit(struct sk_buff *skb, +				struct netlink_callback *cb) +{ +	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb); +} + +static int nldev_res_get_qp_dumpit(struct sk_buff *skb, +				   struct netlink_callback *cb) +{ +	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; +	struct rdma_restrack_entry *res; +	int err, ret = 0, idx = 0; +	struct nlattr *table_attr; +	struct ib_device *device; +	int start = cb->args[0]; +	struct ib_qp *qp = NULL; +	struct nlmsghdr *nlh; +	u32 index, port = 0; + +	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, +			  nldev_policy, NULL); +	/* +	 * Right now, we are expecting the device index to get QP information, +	 * but it is possible to extend this code to return all devices in +	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX. +	 * if it doesn't exist, we will iterate over all devices. +	 * +	 * But it is not needed for now. +	 */ +	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) +		return -EINVAL; + +	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); +	device = ib_device_get_by_index(index); +	if (!device) +		return -EINVAL; + +	/* +	 * If no PORT_INDEX is supplied, we will return all QPs from that device +	 */ +	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { +		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); +		if (!rdma_is_port_valid(device, port)) { +			ret = -EINVAL; +			goto err_index; +		} +	} + +	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, +			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET), +			0, NLM_F_MULTI); + +	if (fill_nldev_handle(skb, device)) { +		ret = -EMSGSIZE; +		goto err; +	} + +	table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_QP); +	if (!table_attr) { +		ret = -EMSGSIZE; +		goto err; +	} + +	down_read(&device->res.rwsem); +	hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_QP) { +		if (idx < start) +			goto next; + +		if ((rdma_is_kernel_res(res) && +		     task_active_pid_ns(current) != &init_pid_ns) || +		    (!rdma_is_kernel_res(res) && +		     task_active_pid_ns(current) != task_active_pid_ns(res->task))) +			/* +			 * 1. Kernel QPs should be visible in init namspace only +			 * 2. Present only QPs visible in the current namespace +			 */ +			goto next; + +		if (!rdma_restrack_get(res)) +			/* +			 * Resource is under release now, but we are not +			 * relesing lock now, so it will be released in +			 * our next pass, once we will get ->next pointer. +			 */ +			goto next; + +		qp = container_of(res, struct ib_qp, res); + +		up_read(&device->res.rwsem); +		ret = fill_res_qp_entry(skb, qp, port); +		down_read(&device->res.rwsem); +		/* +		 * Return resource back, but it won't be released till +		 * the &device->res.rwsem will be released for write. +		 */ +		rdma_restrack_put(res); + +		if (ret == -EMSGSIZE) +			/* +			 * There is a chance to optimize here. +			 * It can be done by using list_prepare_entry +			 * and list_for_each_entry_continue afterwards. +			 */ +			break; +		if (ret) +			goto res_err; +next:		idx++; +	} +	up_read(&device->res.rwsem); + +	nla_nest_end(skb, table_attr); +	nlmsg_end(skb, nlh); +	cb->args[0] = idx; + +	/* +	 * No more QPs to fill, cancel the message and +	 * return 0 to mark end of dumpit. +	 */ +	if (!qp) +		goto err; + +	put_device(&device->dev); +	return skb->len; + +res_err: +	nla_nest_cancel(skb, table_attr); +	up_read(&device->res.rwsem); + +err: +	nlmsg_cancel(skb, nlh); + +err_index: +	put_device(&device->dev); +	return ret; +} +  static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {  	[RDMA_NLDEV_CMD_GET] = {  		.doit = nldev_get_doit, @@ -330,6 +695,23 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {  		.doit = nldev_port_get_doit,  		.dump = nldev_port_get_dumpit,  	}, +	[RDMA_NLDEV_CMD_RES_GET] = { +		.doit = nldev_res_get_doit, +		.dump = nldev_res_get_dumpit, +	}, +	[RDMA_NLDEV_CMD_RES_QP_GET] = { +		.dump = nldev_res_get_qp_dumpit, +		/* +		 * .doit is not implemented yet for two reasons: +		 * 1. It is not needed yet. +		 * 2. There is a need to provide identifier, while it is easy +		 * for the QPs (device index + port index + LQPN), it is not +		 * the case for the rest of resources (PD and CQ). Because it +		 * is better to provide similar interface for all resources, +		 * let's wait till we will have other resources implemented +		 * too. +		 */ +	},  };  void __init nldev_init(void)  |