diff options
Diffstat (limited to 'fs/nfs/flexfilelayout/flexfilelayout.c')
| -rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayout.c | 313 | 
1 files changed, 210 insertions, 103 deletions
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 98ace127bf86..9e111d07f667 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -25,9 +25,20 @@  #define NFSDBG_FACILITY         NFSDBG_PNFS_LD  #define FF_LAYOUT_POLL_RETRY_MAX     (15*HZ) +#define FF_LAYOUTRETURN_MAXERR 20 +  static struct group_info	*ff_zero_group; +static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, +		struct nfs_pgio_header *hdr); +static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, +			       struct nfs42_layoutstat_devinfo *devinfo, +			       int dev_limit); +static void ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr, +			      const struct nfs42_layoutstat_devinfo *devinfo, +			      struct nfs4_ff_layout_mirror *mirror); +  static struct pnfs_layout_hdr *  ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)  { @@ -172,7 +183,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo,  	spin_lock(&inode->i_lock);  	list_for_each_entry(pos, &ff_layout->mirrors, mirrors) { -		if (mirror->mirror_ds != pos->mirror_ds) +		if (memcmp(&mirror->devid, &pos->devid, sizeof(pos->devid)) != 0)  			continue;  		if (!ff_mirror_match_fh(mirror, pos))  			continue; @@ -349,19 +360,6 @@ static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls)  	}  } -static void ff_layout_mark_devices_valid(struct nfs4_ff_layout_segment *fls) -{ -	struct nfs4_deviceid_node *node; -	int i; - -	if (!(fls->flags & FF_FLAGS_NO_IO_THRU_MDS)) -		return; -	for (i = 0; i < fls->mirror_array_cnt; i++) { -		node = &fls->mirror_array[i]->mirror_ds->id_node; -		clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); -	} -} -  static struct pnfs_layout_segment *  ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,  		     struct nfs4_layoutget_res *lgr, @@ -415,8 +413,6 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,  	for (i = 0; i < fls->mirror_array_cnt; i++) {  		struct nfs4_ff_layout_mirror *mirror; -		struct nfs4_deviceid devid; -		struct nfs4_deviceid_node *idnode;  		struct auth_cred acred = { .group_info = ff_zero_group };  		struct rpc_cred	__rcu *cred;  		u32 ds_count, fh_count, id; @@ -441,24 +437,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,  		fls->mirror_array[i]->ds_count = ds_count;  		/* deviceid */ -		rc = decode_deviceid(&stream, &devid); +		rc = decode_deviceid(&stream, &fls->mirror_array[i]->devid);  		if (rc)  			goto out_err_free; -		idnode = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), -						&devid, lh->plh_lc_cred, -						gfp_flags); -		/* -		 * upon success, mirror_ds is allocated by previous -		 * getdeviceinfo, or newly by .alloc_deviceid_node -		 * nfs4_find_get_deviceid failure is indeed getdeviceinfo falure -		 */ -		if (idnode) -			fls->mirror_array[i]->mirror_ds = -				FF_LAYOUT_MIRROR_DS(idnode); -		else -			goto out_err_free; -  		/* efficiency */  		rc = -EIO;  		p = xdr_inline_decode(&stream, 4); @@ -556,8 +538,6 @@ out_sort_mirrors:  	rc = ff_layout_check_layout(lgr);  	if (rc)  		goto out_err_free; -	ff_layout_mark_devices_valid(fls); -  	ret = &fls->generic_hdr;  	dprintk("<-- %s (success)\n", __func__);  out_free_page: @@ -702,6 +682,7 @@ nfs4_ff_layout_stat_io_start_read(struct inode *inode,  	spin_lock(&mirror->lock);  	report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat, now);  	nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); +	set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);  	spin_unlock(&mirror->lock);  	if (report) @@ -718,6 +699,7 @@ nfs4_ff_layout_stat_io_end_read(struct rpc_task *task,  	nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat,  			requested, completed,  			ktime_get(), task->tk_start); +	set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);  	spin_unlock(&mirror->lock);  } @@ -731,6 +713,7 @@ nfs4_ff_layout_stat_io_start_write(struct inode *inode,  	spin_lock(&mirror->lock);  	report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat, now);  	nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); +	set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);  	spin_unlock(&mirror->lock);  	if (report) @@ -750,6 +733,7 @@ nfs4_ff_layout_stat_io_end_write(struct rpc_task *task,  	spin_lock(&mirror->lock);  	nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat,  			requested, completed, ktime_get(), task->tk_start); +	set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);  	spin_unlock(&mirror->lock);  } @@ -1293,6 +1277,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task,  					hdr->pgio_mirror_idx + 1,  					&hdr->pgio_mirror_idx))  			goto out_eagain; +		ff_layout_read_record_layoutstats_done(task, hdr);  		pnfs_read_resend_pnfs(hdr);  		return task->tk_status;  	case -NFS4ERR_RESET_TO_MDS: @@ -1961,38 +1946,88 @@ ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d)  						  id_node));  } -static int ff_layout_encode_ioerr(struct nfs4_flexfile_layout *flo, -				  struct xdr_stream *xdr, -				  const struct nfs4_layoutreturn_args *args) +static int ff_layout_encode_ioerr(struct xdr_stream *xdr, +				  const struct nfs4_layoutreturn_args *args, +				  const struct nfs4_flexfile_layoutreturn_args *ff_args)  { -	struct pnfs_layout_hdr *hdr = &flo->generic_hdr;  	__be32 *start; -	int count = 0, ret = 0;  	start = xdr_reserve_space(xdr, 4);  	if (unlikely(!start))  		return -E2BIG; +	*start = cpu_to_be32(ff_args->num_errors);  	/* This assume we always return _ALL_ layouts */ -	spin_lock(&hdr->plh_inode->i_lock); -	ret = ff_layout_encode_ds_ioerr(flo, xdr, &count, &args->range); -	spin_unlock(&hdr->plh_inode->i_lock); +	return ff_layout_encode_ds_ioerr(xdr, &ff_args->errors); +} -	*start = cpu_to_be32(count); +static void +encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len) +{ +	__be32 *p; -	return ret; +	p = xdr_reserve_space(xdr, len); +	xdr_encode_opaque_fixed(p, buf, len); +} + +static void +ff_layout_encode_ff_iostat_head(struct xdr_stream *xdr, +			    const nfs4_stateid *stateid, +			    const struct nfs42_layoutstat_devinfo *devinfo) +{ +	__be32 *p; + +	p = xdr_reserve_space(xdr, 8 + 8); +	p = xdr_encode_hyper(p, devinfo->offset); +	p = xdr_encode_hyper(p, devinfo->length); +	encode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); +	p = xdr_reserve_space(xdr, 4*8); +	p = xdr_encode_hyper(p, devinfo->read_count); +	p = xdr_encode_hyper(p, devinfo->read_bytes); +	p = xdr_encode_hyper(p, devinfo->write_count); +	p = xdr_encode_hyper(p, devinfo->write_bytes); +	encode_opaque_fixed(xdr, devinfo->dev_id.data, NFS4_DEVICEID4_SIZE); +} + +static void +ff_layout_encode_ff_iostat(struct xdr_stream *xdr, +			    const nfs4_stateid *stateid, +			    const struct nfs42_layoutstat_devinfo *devinfo) +{ +	ff_layout_encode_ff_iostat_head(xdr, stateid, devinfo); +	ff_layout_encode_ff_layoutupdate(xdr, devinfo, +			devinfo->ld_private.data);  }  /* report nothing for now */ -static void ff_layout_encode_iostats(struct nfs4_flexfile_layout *flo, -				     struct xdr_stream *xdr, -				     const struct nfs4_layoutreturn_args *args) +static void ff_layout_encode_iostats_array(struct xdr_stream *xdr, +		const struct nfs4_layoutreturn_args *args, +		struct nfs4_flexfile_layoutreturn_args *ff_args)  {  	__be32 *p; +	int i;  	p = xdr_reserve_space(xdr, 4); -	if (likely(p)) -		*p = cpu_to_be32(0); +	*p = cpu_to_be32(ff_args->num_dev); +	for (i = 0; i < ff_args->num_dev; i++) +		ff_layout_encode_ff_iostat(xdr, +				&args->layout->plh_stateid, +				&ff_args->devinfo[i]); +} + +static void +ff_layout_free_iostats_array(struct nfs42_layoutstat_devinfo *devinfo, +		unsigned int num_entries) +{ +	unsigned int i; + +	for (i = 0; i < num_entries; i++) { +		if (!devinfo[i].ld_private.ops) +			continue; +		if (!devinfo[i].ld_private.ops->free) +			continue; +		devinfo[i].ld_private.ops->free(&devinfo[i].ld_private); +	}  }  static struct nfs4_deviceid_node * @@ -2008,24 +2043,91 @@ ff_layout_alloc_deviceid_node(struct nfs_server *server,  }  static void -ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo, -			      struct xdr_stream *xdr, -			      const struct nfs4_layoutreturn_args *args) -{ -	struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); +ff_layout_encode_layoutreturn(struct xdr_stream *xdr, +		const void *voidargs, +		const struct nfs4_xdr_opaque_data *ff_opaque) +{ +	const struct nfs4_layoutreturn_args *args = voidargs; +	struct nfs4_flexfile_layoutreturn_args *ff_args = ff_opaque->data; +	struct xdr_buf tmp_buf = { +		.head = { +			[0] = { +				.iov_base = page_address(ff_args->pages[0]), +			}, +		}, +		.buflen = PAGE_SIZE, +	}; +	struct xdr_stream tmp_xdr;  	__be32 *start;  	dprintk("%s: Begin\n", __func__); -	start = xdr_reserve_space(xdr, 4); -	BUG_ON(!start); -	ff_layout_encode_ioerr(flo, xdr, args); -	ff_layout_encode_iostats(flo, xdr, args); +	xdr_init_encode(&tmp_xdr, &tmp_buf, NULL); + +	ff_layout_encode_ioerr(&tmp_xdr, args, ff_args); +	ff_layout_encode_iostats_array(&tmp_xdr, args, ff_args); + +	start = xdr_reserve_space(xdr, 4); +	*start = cpu_to_be32(tmp_buf.len); +	xdr_write_pages(xdr, ff_args->pages, 0, tmp_buf.len); -	*start = cpu_to_be32((xdr->p - start - 1) * 4);  	dprintk("%s: Return\n", __func__);  } +static void +ff_layout_free_layoutreturn(struct nfs4_xdr_opaque_data *args) +{ +	struct nfs4_flexfile_layoutreturn_args *ff_args; + +	if (!args->data) +		return; +	ff_args = args->data; +	args->data = NULL; + +	ff_layout_free_ds_ioerr(&ff_args->errors); +	ff_layout_free_iostats_array(ff_args->devinfo, ff_args->num_dev); + +	put_page(ff_args->pages[0]); +	kfree(ff_args); +} + +const struct nfs4_xdr_opaque_ops layoutreturn_ops = { +	.encode = ff_layout_encode_layoutreturn, +	.free = ff_layout_free_layoutreturn, +}; + +static int +ff_layout_prepare_layoutreturn(struct nfs4_layoutreturn_args *args) +{ +	struct nfs4_flexfile_layoutreturn_args *ff_args; +	struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(args->layout); + +	ff_args = kmalloc(sizeof(*ff_args), GFP_KERNEL); +	if (!ff_args) +		goto out_nomem; +	ff_args->pages[0] = alloc_page(GFP_KERNEL); +	if (!ff_args->pages[0]) +		goto out_nomem_free; + +	INIT_LIST_HEAD(&ff_args->errors); +	ff_args->num_errors = ff_layout_fetch_ds_ioerr(args->layout, +			&args->range, &ff_args->errors, +			FF_LAYOUTRETURN_MAXERR); + +	spin_lock(&args->inode->i_lock); +	ff_args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr, +			&ff_args->devinfo[0], ARRAY_SIZE(ff_args->devinfo)); +	spin_unlock(&args->inode->i_lock); + +	args->ld_private->ops = &layoutreturn_ops; +	args->ld_private->data = ff_args; +	return 0; +out_nomem_free: +	kfree(ff_args); +out_nomem: +	return -ENOMEM; +} +  static int  ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen)  { @@ -2146,21 +2248,18 @@ ff_layout_encode_io_latency(struct xdr_stream *xdr,  }  static void -ff_layout_encode_layoutstats(struct xdr_stream *xdr, -			     struct nfs42_layoutstat_args *args, -			     struct nfs42_layoutstat_devinfo *devinfo) +ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr, +			      const struct nfs42_layoutstat_devinfo *devinfo, +			      struct nfs4_ff_layout_mirror *mirror)  { -	struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private;  	struct nfs4_pnfs_ds_addr *da;  	struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds;  	struct nfs_fh *fh = &mirror->fh_versions[0]; -	__be32 *p, *start; +	__be32 *p;  	da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node);  	dprintk("%s: DS %s: encoding address %s\n",  		__func__, ds->ds_remotestr, da->da_remotestr); -	/* layoutupdate length */ -	start = xdr_reserve_space(xdr, 4);  	/* netaddr4 */  	ff_layout_encode_netaddr(xdr, da);  	/* nfs_fh4 */ @@ -2177,42 +2276,71 @@ ff_layout_encode_layoutstats(struct xdr_stream *xdr,  	/* bool */  	p = xdr_reserve_space(xdr, 4);  	*p = cpu_to_be32(false); +} + +static void +ff_layout_encode_layoutstats(struct xdr_stream *xdr, const void *args, +			     const struct nfs4_xdr_opaque_data *opaque) +{ +	struct nfs42_layoutstat_devinfo *devinfo = container_of(opaque, +			struct nfs42_layoutstat_devinfo, ld_private); +	__be32 *start; + +	/* layoutupdate length */ +	start = xdr_reserve_space(xdr, 4); +	ff_layout_encode_ff_layoutupdate(xdr, devinfo, opaque->data);  	*start = cpu_to_be32((xdr->p - start - 1) * 4);  } +static void +ff_layout_free_layoutstats(struct nfs4_xdr_opaque_data *opaque) +{ +	struct nfs4_ff_layout_mirror *mirror = opaque->data; + +	ff_layout_put_mirror(mirror); +} + +static const struct nfs4_xdr_opaque_ops layoutstat_ops = { +	.encode = ff_layout_encode_layoutstats, +	.free	= ff_layout_free_layoutstats, +}; +  static int -ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, -			       struct pnfs_layout_hdr *lo, +ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, +			       struct nfs42_layoutstat_devinfo *devinfo,  			       int dev_limit)  {  	struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo);  	struct nfs4_ff_layout_mirror *mirror;  	struct nfs4_deviceid_node *dev; -	struct nfs42_layoutstat_devinfo *devinfo;  	int i = 0;  	list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) {  		if (i >= dev_limit)  			break; -		if (!mirror->mirror_ds) +		if (IS_ERR_OR_NULL(mirror->mirror_ds)) +			continue; +		if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags))  			continue;  		/* mirror refcount put in cleanup_layoutstats */  		if (!atomic_inc_not_zero(&mirror->ref))  			continue;  		dev = &mirror->mirror_ds->id_node;  -		devinfo = &args->devinfo[i];  		memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE);  		devinfo->offset = 0;  		devinfo->length = NFS4_MAX_UINT64; +		spin_lock(&mirror->lock);  		devinfo->read_count = mirror->read_stat.io_stat.ops_completed;  		devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed;  		devinfo->write_count = mirror->write_stat.io_stat.ops_completed;  		devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; +		spin_unlock(&mirror->lock);  		devinfo->layout_type = LAYOUT_FLEX_FILES; -		devinfo->layoutstats_encode = ff_layout_encode_layoutstats; -		devinfo->layout_private = mirror; +		devinfo->ld_private.ops = &layoutstat_ops; +		devinfo->ld_private.data = mirror; +		devinfo++;  		i++;  	}  	return i; @@ -2222,47 +2350,27 @@ static int  ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)  {  	struct nfs4_flexfile_layout *ff_layout; -	struct nfs4_ff_layout_mirror *mirror; -	int dev_count = 0; +	const int dev_count = PNFS_LAYOUTSTATS_MAXDEV; -	spin_lock(&args->inode->i_lock); -	ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout); -	list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) { -		if (atomic_read(&mirror->ref) != 0) -			dev_count ++; -	} -	spin_unlock(&args->inode->i_lock);  	/* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */ -	if (dev_count > PNFS_LAYOUTSTATS_MAXDEV) { -		dprintk("%s: truncating devinfo to limit (%d:%d)\n", -			__func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV); -		dev_count = PNFS_LAYOUTSTATS_MAXDEV; -	}  	args->devinfo = kmalloc_array(dev_count, sizeof(*args->devinfo), GFP_NOIO);  	if (!args->devinfo)  		return -ENOMEM;  	spin_lock(&args->inode->i_lock); -	args->num_dev = ff_layout_mirror_prepare_stats(args, -			&ff_layout->generic_hdr, dev_count); +	ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout); +	args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr, +			&args->devinfo[0], dev_count);  	spin_unlock(&args->inode->i_lock); +	if (!args->num_dev) { +		kfree(args->devinfo); +		args->devinfo = NULL; +		return -ENOENT; +	}  	return 0;  } -static void -ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data) -{ -	struct nfs4_ff_layout_mirror *mirror; -	int i; - -	for (i = 0; i < data->args.num_dev; i++) { -		mirror = data->args.devinfo[i].layout_private; -		data->args.devinfo[i].layout_private = NULL; -		ff_layout_put_mirror(mirror); -	} -} -  static struct pnfs_layoutdriver_type flexfilelayout_type = {  	.id			= LAYOUT_FLEX_FILES,  	.name			= "LAYOUT_FLEX_FILES", @@ -2284,10 +2392,9 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {  	.read_pagelist		= ff_layout_read_pagelist,  	.write_pagelist		= ff_layout_write_pagelist,  	.alloc_deviceid_node    = ff_layout_alloc_deviceid_node, -	.encode_layoutreturn    = ff_layout_encode_layoutreturn, +	.prepare_layoutreturn   = ff_layout_prepare_layoutreturn,  	.sync			= pnfs_nfs_generic_sync,  	.prepare_layoutstats	= ff_layout_prepare_layoutstats, -	.cleanup_layoutstats	= ff_layout_cleanup_layoutstats,  };  static int __init nfs4flexfilelayout_init(void)  |