diff options
Diffstat (limited to 'fs/nfs/flexfilelayout')
| -rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayout.c | 322 | ||||
| -rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayout.h | 23 | ||||
| -rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayoutdev.c | 153 | 
3 files changed, 340 insertions, 158 deletions
| diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 98ace127bf86..0ca4af8cca5d 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -25,9 +25,20 @@  #define NFSDBG_FACILITY         NFSDBG_PNFS_LD  #define FF_LAYOUT_POLL_RETRY_MAX     (15*HZ) +#define FF_LAYOUTRETURN_MAXERR 20 +  static struct group_info	*ff_zero_group; +static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, +		struct nfs_pgio_header *hdr); +static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, +			       struct nfs42_layoutstat_devinfo *devinfo, +			       int dev_limit); +static void ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr, +			      const struct nfs42_layoutstat_devinfo *devinfo, +			      struct nfs4_ff_layout_mirror *mirror); +  static struct pnfs_layout_hdr *  ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)  { @@ -172,7 +183,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo,  	spin_lock(&inode->i_lock);  	list_for_each_entry(pos, &ff_layout->mirrors, mirrors) { -		if (mirror->mirror_ds != pos->mirror_ds) +		if (memcmp(&mirror->devid, &pos->devid, sizeof(pos->devid)) != 0)  			continue;  		if (!ff_mirror_match_fh(mirror, pos))  			continue; @@ -349,19 +360,6 @@ static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls)  	}  } -static void ff_layout_mark_devices_valid(struct nfs4_ff_layout_segment *fls) -{ -	struct nfs4_deviceid_node *node; -	int i; - -	if (!(fls->flags & FF_FLAGS_NO_IO_THRU_MDS)) -		return; -	for (i = 0; i < fls->mirror_array_cnt; i++) { -		node = &fls->mirror_array[i]->mirror_ds->id_node; -		clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); -	} -} -  static struct pnfs_layout_segment *  ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,  		     struct nfs4_layoutget_res *lgr, @@ -415,8 +413,6 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,  	for (i = 0; i < fls->mirror_array_cnt; i++) {  		struct nfs4_ff_layout_mirror *mirror; -		struct nfs4_deviceid devid; -		struct nfs4_deviceid_node *idnode;  		struct auth_cred acred = { .group_info = ff_zero_group };  		struct rpc_cred	__rcu *cred;  		u32 ds_count, fh_count, id; @@ -441,24 +437,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,  		fls->mirror_array[i]->ds_count = ds_count;  		/* deviceid */ -		rc = decode_deviceid(&stream, &devid); +		rc = decode_deviceid(&stream, &fls->mirror_array[i]->devid);  		if (rc)  			goto out_err_free; -		idnode = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), -						&devid, lh->plh_lc_cred, -						gfp_flags); -		/* -		 * upon success, mirror_ds is allocated by previous -		 * getdeviceinfo, or newly by .alloc_deviceid_node -		 * nfs4_find_get_deviceid failure is indeed getdeviceinfo falure -		 */ -		if (idnode) -			fls->mirror_array[i]->mirror_ds = -				FF_LAYOUT_MIRROR_DS(idnode); -		else -			goto out_err_free; -  		/* efficiency */  		rc = -EIO;  		p = xdr_inline_decode(&stream, 4); @@ -556,8 +538,6 @@ out_sort_mirrors:  	rc = ff_layout_check_layout(lgr);  	if (rc)  		goto out_err_free; -	ff_layout_mark_devices_valid(fls); -  	ret = &fls->generic_hdr;  	dprintk("<-- %s (success)\n", __func__);  out_free_page: @@ -639,12 +619,11 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,  			    struct nfs4_ff_layoutstat *layoutstat,  			    ktime_t now)  { -	static const ktime_t notime = {0};  	s64 report_interval = FF_LAYOUTSTATS_REPORT_INTERVAL;  	struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout);  	nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now); -	if (ktime_equal(mirror->start_time, notime)) +	if (!mirror->start_time)  		mirror->start_time = now;  	if (mirror->report_interval != 0)  		report_interval = (s64)mirror->report_interval * 1000LL; @@ -702,6 +681,7 @@ nfs4_ff_layout_stat_io_start_read(struct inode *inode,  	spin_lock(&mirror->lock);  	report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat, now);  	nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); +	set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);  	spin_unlock(&mirror->lock);  	if (report) @@ -718,6 +698,7 @@ nfs4_ff_layout_stat_io_end_read(struct rpc_task *task,  	nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat,  			requested, completed,  			ktime_get(), task->tk_start); +	set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);  	spin_unlock(&mirror->lock);  } @@ -731,6 +712,7 @@ nfs4_ff_layout_stat_io_start_write(struct inode *inode,  	spin_lock(&mirror->lock);  	report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat, now);  	nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); +	set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);  	spin_unlock(&mirror->lock);  	if (report) @@ -750,6 +732,7 @@ nfs4_ff_layout_stat_io_end_write(struct rpc_task *task,  	spin_lock(&mirror->lock);  	nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat,  			requested, completed, ktime_get(), task->tk_start); +	set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);  	spin_unlock(&mirror->lock);  } @@ -1142,7 +1125,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,  	case -EPIPE:  		dprintk("%s DS connection error %d\n", __func__,  			task->tk_status); -		nfs4_mark_deviceid_unavailable(devid); +		nfs4_delete_deviceid(devid->ld, devid->nfs_client, +				&devid->deviceid);  		rpc_wake_up(&tbl->slot_tbl_waitq);  		/* fall through */  	default: @@ -1191,7 +1175,8 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,  	default:  		dprintk("%s DS connection error %d\n", __func__,  			task->tk_status); -		nfs4_mark_deviceid_unavailable(devid); +		nfs4_delete_deviceid(devid->ld, devid->nfs_client, +				&devid->deviceid);  	}  	/* FIXME: Need to prevent infinite looping here. */  	return -NFS4ERR_RESET_TO_PNFS; @@ -1293,6 +1278,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task,  					hdr->pgio_mirror_idx + 1,  					&hdr->pgio_mirror_idx))  			goto out_eagain; +		ff_layout_read_record_layoutstats_done(task, hdr);  		pnfs_read_resend_pnfs(hdr);  		return task->tk_status;  	case -NFS4ERR_RESET_TO_MDS: @@ -1961,38 +1947,88 @@ ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d)  						  id_node));  } -static int ff_layout_encode_ioerr(struct nfs4_flexfile_layout *flo, -				  struct xdr_stream *xdr, -				  const struct nfs4_layoutreturn_args *args) +static int ff_layout_encode_ioerr(struct xdr_stream *xdr, +				  const struct nfs4_layoutreturn_args *args, +				  const struct nfs4_flexfile_layoutreturn_args *ff_args)  { -	struct pnfs_layout_hdr *hdr = &flo->generic_hdr;  	__be32 *start; -	int count = 0, ret = 0;  	start = xdr_reserve_space(xdr, 4);  	if (unlikely(!start))  		return -E2BIG; +	*start = cpu_to_be32(ff_args->num_errors);  	/* This assume we always return _ALL_ layouts */ -	spin_lock(&hdr->plh_inode->i_lock); -	ret = ff_layout_encode_ds_ioerr(flo, xdr, &count, &args->range); -	spin_unlock(&hdr->plh_inode->i_lock); +	return ff_layout_encode_ds_ioerr(xdr, &ff_args->errors); +} -	*start = cpu_to_be32(count); +static void +encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len) +{ +	__be32 *p; -	return ret; +	p = xdr_reserve_space(xdr, len); +	xdr_encode_opaque_fixed(p, buf, len); +} + +static void +ff_layout_encode_ff_iostat_head(struct xdr_stream *xdr, +			    const nfs4_stateid *stateid, +			    const struct nfs42_layoutstat_devinfo *devinfo) +{ +	__be32 *p; + +	p = xdr_reserve_space(xdr, 8 + 8); +	p = xdr_encode_hyper(p, devinfo->offset); +	p = xdr_encode_hyper(p, devinfo->length); +	encode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); +	p = xdr_reserve_space(xdr, 4*8); +	p = xdr_encode_hyper(p, devinfo->read_count); +	p = xdr_encode_hyper(p, devinfo->read_bytes); +	p = xdr_encode_hyper(p, devinfo->write_count); +	p = xdr_encode_hyper(p, devinfo->write_bytes); +	encode_opaque_fixed(xdr, devinfo->dev_id.data, NFS4_DEVICEID4_SIZE); +} + +static void +ff_layout_encode_ff_iostat(struct xdr_stream *xdr, +			    const nfs4_stateid *stateid, +			    const struct nfs42_layoutstat_devinfo *devinfo) +{ +	ff_layout_encode_ff_iostat_head(xdr, stateid, devinfo); +	ff_layout_encode_ff_layoutupdate(xdr, devinfo, +			devinfo->ld_private.data);  }  /* report nothing for now */ -static void ff_layout_encode_iostats(struct nfs4_flexfile_layout *flo, -				     struct xdr_stream *xdr, -				     const struct nfs4_layoutreturn_args *args) +static void ff_layout_encode_iostats_array(struct xdr_stream *xdr, +		const struct nfs4_layoutreturn_args *args, +		struct nfs4_flexfile_layoutreturn_args *ff_args)  {  	__be32 *p; +	int i;  	p = xdr_reserve_space(xdr, 4); -	if (likely(p)) -		*p = cpu_to_be32(0); +	*p = cpu_to_be32(ff_args->num_dev); +	for (i = 0; i < ff_args->num_dev; i++) +		ff_layout_encode_ff_iostat(xdr, +				&args->layout->plh_stateid, +				&ff_args->devinfo[i]); +} + +static void +ff_layout_free_iostats_array(struct nfs42_layoutstat_devinfo *devinfo, +		unsigned int num_entries) +{ +	unsigned int i; + +	for (i = 0; i < num_entries; i++) { +		if (!devinfo[i].ld_private.ops) +			continue; +		if (!devinfo[i].ld_private.ops->free) +			continue; +		devinfo[i].ld_private.ops->free(&devinfo[i].ld_private); +	}  }  static struct nfs4_deviceid_node * @@ -2008,24 +2044,91 @@ ff_layout_alloc_deviceid_node(struct nfs_server *server,  }  static void -ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo, -			      struct xdr_stream *xdr, -			      const struct nfs4_layoutreturn_args *args) -{ -	struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); +ff_layout_encode_layoutreturn(struct xdr_stream *xdr, +		const void *voidargs, +		const struct nfs4_xdr_opaque_data *ff_opaque) +{ +	const struct nfs4_layoutreturn_args *args = voidargs; +	struct nfs4_flexfile_layoutreturn_args *ff_args = ff_opaque->data; +	struct xdr_buf tmp_buf = { +		.head = { +			[0] = { +				.iov_base = page_address(ff_args->pages[0]), +			}, +		}, +		.buflen = PAGE_SIZE, +	}; +	struct xdr_stream tmp_xdr;  	__be32 *start;  	dprintk("%s: Begin\n", __func__); -	start = xdr_reserve_space(xdr, 4); -	BUG_ON(!start); -	ff_layout_encode_ioerr(flo, xdr, args); -	ff_layout_encode_iostats(flo, xdr, args); +	xdr_init_encode(&tmp_xdr, &tmp_buf, NULL); + +	ff_layout_encode_ioerr(&tmp_xdr, args, ff_args); +	ff_layout_encode_iostats_array(&tmp_xdr, args, ff_args); + +	start = xdr_reserve_space(xdr, 4); +	*start = cpu_to_be32(tmp_buf.len); +	xdr_write_pages(xdr, ff_args->pages, 0, tmp_buf.len); -	*start = cpu_to_be32((xdr->p - start - 1) * 4);  	dprintk("%s: Return\n", __func__);  } +static void +ff_layout_free_layoutreturn(struct nfs4_xdr_opaque_data *args) +{ +	struct nfs4_flexfile_layoutreturn_args *ff_args; + +	if (!args->data) +		return; +	ff_args = args->data; +	args->data = NULL; + +	ff_layout_free_ds_ioerr(&ff_args->errors); +	ff_layout_free_iostats_array(ff_args->devinfo, ff_args->num_dev); + +	put_page(ff_args->pages[0]); +	kfree(ff_args); +} + +const struct nfs4_xdr_opaque_ops layoutreturn_ops = { +	.encode = ff_layout_encode_layoutreturn, +	.free = ff_layout_free_layoutreturn, +}; + +static int +ff_layout_prepare_layoutreturn(struct nfs4_layoutreturn_args *args) +{ +	struct nfs4_flexfile_layoutreturn_args *ff_args; +	struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(args->layout); + +	ff_args = kmalloc(sizeof(*ff_args), GFP_KERNEL); +	if (!ff_args) +		goto out_nomem; +	ff_args->pages[0] = alloc_page(GFP_KERNEL); +	if (!ff_args->pages[0]) +		goto out_nomem_free; + +	INIT_LIST_HEAD(&ff_args->errors); +	ff_args->num_errors = ff_layout_fetch_ds_ioerr(args->layout, +			&args->range, &ff_args->errors, +			FF_LAYOUTRETURN_MAXERR); + +	spin_lock(&args->inode->i_lock); +	ff_args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr, +			&ff_args->devinfo[0], ARRAY_SIZE(ff_args->devinfo)); +	spin_unlock(&args->inode->i_lock); + +	args->ld_private->ops = &layoutreturn_ops; +	args->ld_private->data = ff_args; +	return 0; +out_nomem_free: +	kfree(ff_args); +out_nomem: +	return -ENOMEM; +} +  static int  ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen)  { @@ -2146,21 +2249,18 @@ ff_layout_encode_io_latency(struct xdr_stream *xdr,  }  static void -ff_layout_encode_layoutstats(struct xdr_stream *xdr, -			     struct nfs42_layoutstat_args *args, -			     struct nfs42_layoutstat_devinfo *devinfo) +ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr, +			      const struct nfs42_layoutstat_devinfo *devinfo, +			      struct nfs4_ff_layout_mirror *mirror)  { -	struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private;  	struct nfs4_pnfs_ds_addr *da;  	struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds;  	struct nfs_fh *fh = &mirror->fh_versions[0]; -	__be32 *p, *start; +	__be32 *p;  	da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node);  	dprintk("%s: DS %s: encoding address %s\n",  		__func__, ds->ds_remotestr, da->da_remotestr); -	/* layoutupdate length */ -	start = xdr_reserve_space(xdr, 4);  	/* netaddr4 */  	ff_layout_encode_netaddr(xdr, da);  	/* nfs_fh4 */ @@ -2177,42 +2277,71 @@ ff_layout_encode_layoutstats(struct xdr_stream *xdr,  	/* bool */  	p = xdr_reserve_space(xdr, 4);  	*p = cpu_to_be32(false); +} + +static void +ff_layout_encode_layoutstats(struct xdr_stream *xdr, const void *args, +			     const struct nfs4_xdr_opaque_data *opaque) +{ +	struct nfs42_layoutstat_devinfo *devinfo = container_of(opaque, +			struct nfs42_layoutstat_devinfo, ld_private); +	__be32 *start; + +	/* layoutupdate length */ +	start = xdr_reserve_space(xdr, 4); +	ff_layout_encode_ff_layoutupdate(xdr, devinfo, opaque->data);  	*start = cpu_to_be32((xdr->p - start - 1) * 4);  } +static void +ff_layout_free_layoutstats(struct nfs4_xdr_opaque_data *opaque) +{ +	struct nfs4_ff_layout_mirror *mirror = opaque->data; + +	ff_layout_put_mirror(mirror); +} + +static const struct nfs4_xdr_opaque_ops layoutstat_ops = { +	.encode = ff_layout_encode_layoutstats, +	.free	= ff_layout_free_layoutstats, +}; +  static int -ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, -			       struct pnfs_layout_hdr *lo, +ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, +			       struct nfs42_layoutstat_devinfo *devinfo,  			       int dev_limit)  {  	struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo);  	struct nfs4_ff_layout_mirror *mirror;  	struct nfs4_deviceid_node *dev; -	struct nfs42_layoutstat_devinfo *devinfo;  	int i = 0;  	list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) {  		if (i >= dev_limit)  			break; -		if (!mirror->mirror_ds) +		if (IS_ERR_OR_NULL(mirror->mirror_ds)) +			continue; +		if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags))  			continue;  		/* mirror refcount put in cleanup_layoutstats */  		if (!atomic_inc_not_zero(&mirror->ref))  			continue;  		dev = &mirror->mirror_ds->id_node;  -		devinfo = &args->devinfo[i];  		memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE);  		devinfo->offset = 0;  		devinfo->length = NFS4_MAX_UINT64; +		spin_lock(&mirror->lock);  		devinfo->read_count = mirror->read_stat.io_stat.ops_completed;  		devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed;  		devinfo->write_count = mirror->write_stat.io_stat.ops_completed;  		devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; +		spin_unlock(&mirror->lock);  		devinfo->layout_type = LAYOUT_FLEX_FILES; -		devinfo->layoutstats_encode = ff_layout_encode_layoutstats; -		devinfo->layout_private = mirror; +		devinfo->ld_private.ops = &layoutstat_ops; +		devinfo->ld_private.data = mirror; +		devinfo++;  		i++;  	}  	return i; @@ -2222,47 +2351,27 @@ static int  ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)  {  	struct nfs4_flexfile_layout *ff_layout; -	struct nfs4_ff_layout_mirror *mirror; -	int dev_count = 0; +	const int dev_count = PNFS_LAYOUTSTATS_MAXDEV; -	spin_lock(&args->inode->i_lock); -	ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout); -	list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) { -		if (atomic_read(&mirror->ref) != 0) -			dev_count ++; -	} -	spin_unlock(&args->inode->i_lock);  	/* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */ -	if (dev_count > PNFS_LAYOUTSTATS_MAXDEV) { -		dprintk("%s: truncating devinfo to limit (%d:%d)\n", -			__func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV); -		dev_count = PNFS_LAYOUTSTATS_MAXDEV; -	}  	args->devinfo = kmalloc_array(dev_count, sizeof(*args->devinfo), GFP_NOIO);  	if (!args->devinfo)  		return -ENOMEM;  	spin_lock(&args->inode->i_lock); -	args->num_dev = ff_layout_mirror_prepare_stats(args, -			&ff_layout->generic_hdr, dev_count); +	ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout); +	args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr, +			&args->devinfo[0], dev_count);  	spin_unlock(&args->inode->i_lock); +	if (!args->num_dev) { +		kfree(args->devinfo); +		args->devinfo = NULL; +		return -ENOENT; +	}  	return 0;  } -static void -ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data) -{ -	struct nfs4_ff_layout_mirror *mirror; -	int i; - -	for (i = 0; i < data->args.num_dev; i++) { -		mirror = data->args.devinfo[i].layout_private; -		data->args.devinfo[i].layout_private = NULL; -		ff_layout_put_mirror(mirror); -	} -} -  static struct pnfs_layoutdriver_type flexfilelayout_type = {  	.id			= LAYOUT_FLEX_FILES,  	.name			= "LAYOUT_FLEX_FILES", @@ -2284,10 +2393,9 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {  	.read_pagelist		= ff_layout_read_pagelist,  	.write_pagelist		= ff_layout_write_pagelist,  	.alloc_deviceid_node    = ff_layout_alloc_deviceid_node, -	.encode_layoutreturn    = ff_layout_encode_layoutreturn, +	.prepare_layoutreturn   = ff_layout_prepare_layoutreturn,  	.sync			= pnfs_nfs_generic_sync,  	.prepare_layoutstats	= ff_layout_prepare_layoutstats, -	.cleanup_layoutstats	= ff_layout_cleanup_layoutstats,  };  static int __init nfs4flexfilelayout_init(void) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 3ee0c9fcea76..f4f39b0ab09b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -21,6 +21,7 @@  /* LAYOUTSTATS report interval in ms */  #define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L) +#define FF_LAYOUTSTATS_MAXDEV 4  struct nfs4_ff_ds_version {  	u32				version; @@ -73,6 +74,7 @@ struct nfs4_ff_layout_mirror {  	struct list_head		mirrors;  	u32				ds_count;  	u32				efficiency; +	struct nfs4_deviceid		devid;  	struct nfs4_ff_layout_ds	*mirror_ds;  	u32				fh_versions_cnt;  	struct nfs_fh			*fh_versions; @@ -81,12 +83,15 @@ struct nfs4_ff_layout_mirror {  	struct rpc_cred	__rcu		*rw_cred;  	atomic_t			ref;  	spinlock_t			lock; +	unsigned long			flags;  	struct nfs4_ff_layoutstat	read_stat;  	struct nfs4_ff_layoutstat	write_stat;  	ktime_t				start_time;  	u32				report_interval;  }; +#define NFS4_FF_MIRROR_STAT_AVAIL	(0) +  struct nfs4_ff_layout_segment {  	struct pnfs_layout_segment	generic_hdr;  	u64				stripe_unit; @@ -103,6 +108,14 @@ struct nfs4_flexfile_layout {  	ktime_t			last_report_time; /* Layoutstat report times */  }; +struct nfs4_flexfile_layoutreturn_args { +	struct list_head errors; +	struct nfs42_layoutstat_devinfo devinfo[FF_LAYOUTSTATS_MAXDEV]; +	unsigned int num_errors; +	unsigned int num_dev; +	struct page *pages[1]; +}; +  static inline struct nfs4_flexfile_layout *  FF_LAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo)  { @@ -180,9 +193,12 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,  			     struct nfs4_ff_layout_mirror *mirror, u64 offset,  			     u64 length, int status, enum nfs_opnum4 opnum,  			     gfp_t gfp_flags); -int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, -			      struct xdr_stream *xdr, int *count, -			      const struct pnfs_layout_range *range); +int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head); +void ff_layout_free_ds_ioerr(struct list_head *head); +unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, +		const struct pnfs_layout_range *range, +		struct list_head *head, +		unsigned int maxnum);  struct nfs_fh *  nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx); @@ -197,7 +213,6 @@ nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg,  				 struct inode *inode);  struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg,  				       u32 ds_idx, struct rpc_cred *mdscred); -bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg);  bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg);  bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg); diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index f7a3f6b05369..e5a6f248697b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -20,9 +20,11 @@  static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS;  static unsigned int dataserver_retrans; +static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); +  void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds)  { -	if (mirror_ds) +	if (!IS_ERR_OR_NULL(mirror_ds))  		nfs4_put_deviceid_node(&mirror_ds->id_node);  } @@ -175,19 +177,36 @@ out_err:  static void ff_layout_mark_devid_invalid(struct pnfs_layout_segment *lseg,  		struct nfs4_deviceid_node *devid)  { -	nfs4_mark_deviceid_unavailable(devid); +	nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid);  	if (!ff_layout_has_available_ds(lseg))  		pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode,  				lseg);  }  static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg, -		struct nfs4_ff_layout_mirror *mirror) +				   struct nfs4_ff_layout_mirror *mirror, +				   bool create)  { -	if (mirror == NULL || mirror->mirror_ds == NULL) { -		pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, -					lseg); -		return false; +	if (mirror == NULL || IS_ERR(mirror->mirror_ds)) +		goto outerr; +	if (mirror->mirror_ds == NULL) { +		if (create) { +			struct nfs4_deviceid_node *node; +			struct pnfs_layout_hdr *lh = lseg->pls_layout; +			struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV); + +			node = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), +					&mirror->devid, lh->plh_lc_cred, +					GFP_KERNEL); +			if (node) +				mirror_ds = FF_LAYOUT_MIRROR_DS(node); + +			/* check for race with another call to this function */ +			if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) && +			    mirror_ds != ERR_PTR(-ENODEV)) +				nfs4_put_deviceid_node(node); +		} else +			goto outerr;  	}  	if (mirror->mirror_ds->ds == NULL) {  		struct nfs4_deviceid_node *devid; @@ -196,15 +215,9 @@ static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg,  		return false;  	}  	return true; -} - -static u64 -end_offset(u64 start, u64 len) -{ -	u64 end; - -	end = start + len; -	return end >= start ? end : NFS4_MAX_UINT64; +outerr: +	pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg); +	return false;  }  static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, @@ -212,8 +225,8 @@ static void extend_ds_error(struct nfs4_ff_layout_ds_err *err,  {  	u64 end; -	end = max_t(u64, end_offset(err->offset, err->length), -		    end_offset(offset, length)); +	end = max_t(u64, pnfs_end_offset(err->offset, err->length), +		    pnfs_end_offset(offset, length));  	err->offset = min_t(u64, err->offset, offset);  	err->length = end - err->offset;  } @@ -235,9 +248,9 @@ ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1,  	ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid));  	if (ret != 0)  		return ret; -	if (end_offset(e1->offset, e1->length) < e2->offset) +	if (pnfs_end_offset(e1->offset, e1->length) < e2->offset)  		return -1; -	if (e1->offset > end_offset(e2->offset, e2->length)) +	if (e1->offset > pnfs_end_offset(e2->offset, e2->length))  		return 1;  	/* If ranges overlap or are contiguous, they are the same */  	return 0; @@ -263,8 +276,9 @@ ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo,  		}  		/* Entries match, so merge "err" into "dserr" */  		extend_ds_error(dserr, err->offset, err->length); -		list_del(&err->list); +		list_replace(&err->list, &dserr->list);  		kfree(err); +		return;  	}  	list_add_tail(&dserr->list, head); @@ -331,7 +345,7 @@ nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx)  	struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx);  	struct nfs_fh *fh = NULL; -	if (!ff_layout_mirror_valid(lseg, mirror)) { +	if (!ff_layout_mirror_valid(lseg, mirror, false)) {  		pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n",  			__func__, mirror_idx);  		goto out; @@ -371,7 +385,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,  	struct nfs_server *s = NFS_SERVER(ino);  	unsigned int max_payload; -	if (!ff_layout_mirror_valid(lseg, mirror)) { +	if (!ff_layout_mirror_valid(lseg, mirror, true)) {  		pr_err_ratelimited("NFS: %s: No data server for offset index %d\n",  			__func__, ds_idx);  		goto out; @@ -393,8 +407,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,  	nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,  			     dataserver_retrans,  			     mirror->mirror_ds->ds_versions[0].version, -			     mirror->mirror_ds->ds_versions[0].minor_version, -			     RPC_AUTH_UNIX); +			     mirror->mirror_ds->ds_versions[0].minor_version);  	/* connect success, check rsize/wsize limit */  	if (ds->ds_clp) { @@ -457,28 +470,26 @@ nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx,  	}  } -static bool is_range_intersecting(u64 offset1, u64 length1, -				  u64 offset2, u64 length2) +void ff_layout_free_ds_ioerr(struct list_head *head)  { -	u64 end1 = end_offset(offset1, length1); -	u64 end2 = end_offset(offset2, length2); +	struct nfs4_ff_layout_ds_err *err; -	return (end1 == NFS4_MAX_UINT64 || end1 > offset2) && -	       (end2 == NFS4_MAX_UINT64 || end2 > offset1); +	while (!list_empty(head)) { +		err = list_first_entry(head, +				struct nfs4_ff_layout_ds_err, +				list); +		list_del(&err->list); +		kfree(err); +	}  }  /* called with inode i_lock held */ -int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, -			      struct xdr_stream *xdr, int *count, -			      const struct pnfs_layout_range *range) +int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head)  { -	struct nfs4_ff_layout_ds_err *err, *n; +	struct nfs4_ff_layout_ds_err *err;  	__be32 *p; -	list_for_each_entry_safe(err, n, &flo->error_list, list) { -		if (!is_range_intersecting(err->offset, err->length, -					   range->offset, range->length)) -			continue; +	list_for_each_entry(err, head, list) {  		/* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE)  		 * + array length + deviceid(NFS4_DEVICEID4_SIZE)  		 * + status(4) + opnum(4) @@ -497,17 +508,59 @@ int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo,  					    NFS4_DEVICEID4_SIZE);  		*p++ = cpu_to_be32(err->status);  		*p++ = cpu_to_be32(err->opnum); -		*count += 1; -		list_del(&err->list); -		dprintk("%s: offset %llu length %llu status %d op %d count %d\n", +		dprintk("%s: offset %llu length %llu status %d op %d\n",  			__func__, err->offset, err->length, err->status, -			err->opnum, *count); -		kfree(err); +			err->opnum);  	}  	return 0;  } +static +unsigned int do_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, +				      const struct pnfs_layout_range *range, +				      struct list_head *head, +				      unsigned int maxnum) +{ +	struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); +	struct inode *inode = lo->plh_inode; +	struct nfs4_ff_layout_ds_err *err, *n; +	unsigned int ret = 0; + +	spin_lock(&inode->i_lock); +	list_for_each_entry_safe(err, n, &flo->error_list, list) { +		if (!pnfs_is_range_intersecting(err->offset, +				pnfs_end_offset(err->offset, err->length), +				range->offset, +				pnfs_end_offset(range->offset, range->length))) +			continue; +		if (!maxnum) +			break; +		list_move(&err->list, head); +		maxnum--; +		ret++; +	} +	spin_unlock(&inode->i_lock); +	return ret; +} + +unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, +				      const struct pnfs_layout_range *range, +				      struct list_head *head, +				      unsigned int maxnum) +{ +	unsigned int ret; + +	ret = do_layout_fetch_ds_ioerr(lo, range, head, maxnum); +	/* If we're over the max, discard all remaining entries */ +	if (ret == maxnum) { +		LIST_HEAD(discard); +		do_layout_fetch_ds_ioerr(lo, range, &discard, -1); +		ff_layout_free_ds_ioerr(&discard); +	} +	return ret; +} +  static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg)  {  	struct nfs4_ff_layout_mirror *mirror; @@ -516,7 +569,11 @@ static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg)  	for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) {  		mirror = FF_LAYOUT_COMP(lseg, idx); -		if (mirror && mirror->mirror_ds) { +		if (mirror) { +			if (!mirror->mirror_ds) +				return true; +			if (IS_ERR(mirror->mirror_ds)) +				continue;  			devid = &mirror->mirror_ds->id_node;  			if (!ff_layout_test_devid_unavailable(devid))  				return true; @@ -534,8 +591,10 @@ static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg)  	for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) {  		mirror = FF_LAYOUT_COMP(lseg, idx); -		if (!mirror || !mirror->mirror_ds) +		if (!mirror || IS_ERR(mirror->mirror_ds))  			return false; +		if (!mirror->mirror_ds) +			continue;  		devid = &mirror->mirror_ds->id_node;  		if (ff_layout_test_devid_unavailable(devid))  			return false; @@ -544,7 +603,7 @@ static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg)  	return FF_LAYOUT_MIRROR_COUNT(lseg) != 0;  } -bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) +static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg)  {  	if (lseg->pls_range.iomode == IOMODE_READ)  		return  ff_read_layout_has_available_ds(lseg); |