diff options
Diffstat (limited to 'fs/nfs')
32 files changed, 957 insertions, 602 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 8664417955a2..6abdda209642 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o  CFLAGS_nfstrace.o += -I$(src)  nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o \ -			   direct.o pagelist.o read.o symlink.o unlink.o \ +			   io.o direct.o pagelist.o read.o symlink.o unlink.o \  			   write.o namespace.o mount_clnt.o nfstrace.o  nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o  nfs-$(CONFIG_SYSCTL)	+= sysctl.o diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 17a42e4eb872..f55a4e756047 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -102,14 +102,15 @@ static inline void put_parallel(struct parallel_io *p)  }  static struct bio * -bl_submit_bio(int rw, struct bio *bio) +bl_submit_bio(struct bio *bio)  {  	if (bio) {  		get_parallel(bio->bi_private);  		dprintk("%s submitting %s bio %u@%llu\n", __func__, -			rw == READ ? "read" : "write", bio->bi_iter.bi_size, +			bio_op(bio) == READ ? "read" : "write", +			bio->bi_iter.bi_size,  			(unsigned long long)bio->bi_iter.bi_sector); -		submit_bio(rw, bio); +		submit_bio(bio);  	}  	return NULL;  } @@ -158,7 +159,7 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,  	if (disk_addr < map->start || disk_addr >= map->start + map->len) {  		if (!dev->map(dev, disk_addr, map))  			return ERR_PTR(-EIO); -		bio = bl_submit_bio(rw, bio); +		bio = bl_submit_bio(bio);  	}  	disk_addr += map->disk_offset;  	disk_addr -= map->start; @@ -174,9 +175,10 @@ retry:  				disk_addr >> SECTOR_SHIFT, end_io, par);  		if (!bio)  			return ERR_PTR(-ENOMEM); +		bio_set_op_attrs(bio, rw, 0);  	}  	if (bio_add_page(bio, page, *len, offset) < *len) { -		bio = bl_submit_bio(rw, bio); +		bio = bl_submit_bio(bio);  		goto retry;  	}  	return bio; @@ -252,7 +254,7 @@ bl_read_pagelist(struct nfs_pgio_header *header)  	for (i = pg_index; i < header->page_array.npages; i++) {  		if (extent_length <= 0) {  			/* We've used up the previous extent */ -			bio = bl_submit_bio(READ, bio); +			bio = bl_submit_bio(bio);  			/* Get the next one */  			if (!ext_tree_lookup(bl, isect, &be, false)) { @@ -273,7 +275,7 @@ bl_read_pagelist(struct nfs_pgio_header *header)  		}  		if (is_hole(&be)) { -			bio = bl_submit_bio(READ, bio); +			bio = bl_submit_bio(bio);  			/* Fill hole w/ zeroes w/o accessing device */  			dprintk("%s Zeroing page for hole\n", __func__);  			zero_user_segment(pages[i], pg_offset, pg_len); @@ -306,7 +308,7 @@ bl_read_pagelist(struct nfs_pgio_header *header)  		header->res.count = (isect << SECTOR_SHIFT) - header->args.offset;  	}  out: -	bl_submit_bio(READ, bio); +	bl_submit_bio(bio);  	blk_finish_plug(&plug);  	put_parallel(par);  	return PNFS_ATTEMPTED; @@ -398,7 +400,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)  	for (i = pg_index; i < header->page_array.npages; i++) {  		if (extent_length <= 0) {  			/* We've used up the previous extent */ -			bio = bl_submit_bio(WRITE, bio); +			bio = bl_submit_bio(bio);  			/* Get the next one */  			if (!ext_tree_lookup(bl, isect, &be, true)) {  				header->pnfs_error = -EINVAL; @@ -427,7 +429,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)  	header->res.count = header->args.count;  out: -	bl_submit_bio(WRITE, bio); +	bl_submit_bio(bio);  	blk_finish_plug(&plug);  	put_parallel(par);  	return PNFS_ATTEMPTED; diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index e5b89675263e..a69ef4e9c24c 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -65,8 +65,8 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)  		if (!p)  			return -EIO;  		b->simple.nr_sigs = be32_to_cpup(p++); -		if (!b->simple.nr_sigs) { -			dprintk("no signature\n"); +		if (!b->simple.nr_sigs || b->simple.nr_sigs > PNFS_BLOCK_MAX_UUIDS) { +			dprintk("Bad signature count: %d\n", b->simple.nr_sigs);  			return -EIO;  		} @@ -89,7 +89,8 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)  			memcpy(&b->simple.sigs[i].sig, p,  				b->simple.sigs[i].sig_len); -			b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len; +			b->simple.len += 8 + 4 + \ +				(XDR_QUADLEN(b->simple.sigs[i].sig_len) << 2);  		}  		break;  	case PNFS_BLOCK_VOLUME_SLICE: @@ -104,7 +105,12 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)  		p = xdr_inline_decode(xdr, 4);  		if (!p)  			return -EIO; +  		b->concat.volumes_count = be32_to_cpup(p++); +		if (b->concat.volumes_count > PNFS_BLOCK_MAX_DEVICES) { +			dprintk("Too many volumes: %d\n", b->concat.volumes_count); +			return -EIO; +		}  		p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);  		if (!p) @@ -116,8 +122,13 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)  		p = xdr_inline_decode(xdr, 8 + 4);  		if (!p)  			return -EIO; +  		p = xdr_decode_hyper(p, &b->stripe.chunk_size);  		b->stripe.volumes_count = be32_to_cpup(p++); +		if (b->stripe.volumes_count > PNFS_BLOCK_MAX_DEVICES) { +			dprintk("Too many volumes: %d\n", b->stripe.volumes_count); +			return -EIO; +		}  		p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);  		if (!p) @@ -224,18 +235,20 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,  		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)  {  	struct pnfs_block_volume *v = &volumes[idx]; +	struct block_device *bdev;  	dev_t dev;  	dev = bl_resolve_deviceid(server, v, gfp_mask);  	if (!dev)  		return -EIO; -	d->bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL); -	if (IS_ERR(d->bdev)) { +	bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL); +	if (IS_ERR(bdev)) {  		printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n", -			MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev)); -		return PTR_ERR(d->bdev); +			MAJOR(dev), MINOR(dev), PTR_ERR(bdev)); +		return PTR_ERR(bdev);  	} +	d->bdev = bdev;  	d->len = i_size_read(d->bdev->bd_inode); @@ -287,44 +300,71 @@ bl_validate_designator(struct pnfs_block_volume *v)  	}  } +/* + * Try to open the udev path for the WWN.  At least on Debian the udev + * by-id path will always point to the dm-multipath device if one exists. + */ +static struct block_device * +bl_open_udev_path(struct pnfs_block_volume *v) +{ +	struct block_device *bdev; +	const char *devname; + +	devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%*phN", +				v->scsi.designator_len, v->scsi.designator); +	if (!devname) +		return ERR_PTR(-ENOMEM); + +	bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL); +	if (IS_ERR(bdev)) { +		pr_warn("pNFS: failed to open device %s (%ld)\n", +			devname, PTR_ERR(bdev)); +	} + +	kfree(devname); +	return bdev; +} + +/* + * Try to open the RH/Fedora specific dm-mpath udev path for this WWN, as the + * wwn- links will only point to the first discovered SCSI device there. + */ +static struct block_device * +bl_open_dm_mpath_udev_path(struct pnfs_block_volume *v) +{ +	struct block_device *bdev; +	const char *devname; + +	devname = kasprintf(GFP_KERNEL, +			"/dev/disk/by-id/dm-uuid-mpath-%d%*phN", +			v->scsi.designator_type, +			v->scsi.designator_len, v->scsi.designator); +	if (!devname) +		return ERR_PTR(-ENOMEM); + +	bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL); +	kfree(devname); +	return bdev; +} +  static int  bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,  		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)  {  	struct pnfs_block_volume *v = &volumes[idx]; +	struct block_device *bdev;  	const struct pr_ops *ops; -	const char *devname;  	int error;  	if (!bl_validate_designator(v))  		return -EINVAL; -	switch (v->scsi.designator_len) { -	case 8: -		devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%8phN", -				v->scsi.designator); -		break; -	case 12: -		devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%12phN", -				v->scsi.designator); -		break; -	case 16: -		devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%16phN", -				v->scsi.designator); -		break; -	default: -		return -EINVAL; -	} - -	d->bdev = blkdev_get_by_path(devname, FMODE_READ, NULL); -	if (IS_ERR(d->bdev)) { -		pr_warn("pNFS: failed to open device %s (%ld)\n", -			devname, PTR_ERR(d->bdev)); -		kfree(devname); -		return PTR_ERR(d->bdev); -	} - -	kfree(devname); +	bdev = bl_open_dm_mpath_udev_path(v); +	if (IS_ERR(bdev)) +		bdev = bl_open_udev_path(v); +	if (IS_ERR(bdev)) +		return PTR_ERR(bdev); +	d->bdev = bdev;  	d->len = i_size_read(d->bdev->bd_inode);  	d->map = bl_map_simple; @@ -352,7 +392,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,  	return 0;  out_blkdev_put: -	blkdev_put(d->bdev, FMODE_READ); +	blkdev_put(d->bdev, FMODE_READ | FMODE_WRITE);  	return error;  } diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index 720b3ff55fa9..992bcb19c11e 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c @@ -121,6 +121,16 @@ ext_try_to_merge_right(struct rb_root *root, struct pnfs_block_extent *be)  	return be;  } +static void __ext_put_deviceids(struct list_head *head) +{ +	struct pnfs_block_extent *be, *tmp; + +	list_for_each_entry_safe(be, tmp, head, be_list) { +		nfs4_put_deviceid_node(be->be_device); +		kfree(be); +	} +} +  static void  __ext_tree_insert(struct rb_root *root,  		struct pnfs_block_extent *new, bool merge_ok) @@ -163,7 +173,8 @@ free_new:  }  static int -__ext_tree_remove(struct rb_root *root, sector_t start, sector_t end) +__ext_tree_remove(struct rb_root *root, +		sector_t start, sector_t end, struct list_head *tmp)  {  	struct pnfs_block_extent *be;  	sector_t len1 = 0, len2 = 0; @@ -223,8 +234,7 @@ __ext_tree_remove(struct rb_root *root, sector_t start, sector_t end)  			struct pnfs_block_extent *next = ext_tree_next(be);  			rb_erase(&be->be_node, root); -			nfs4_put_deviceid_node(be->be_device); -			kfree(be); +			list_add_tail(&be->be_list, tmp);  			be = next;  		} @@ -350,16 +360,18 @@ int ext_tree_remove(struct pnfs_block_layout *bl, bool rw,  		sector_t start, sector_t end)  {  	int err, err2; +	LIST_HEAD(tmp);  	spin_lock(&bl->bl_ext_lock); -	err = __ext_tree_remove(&bl->bl_ext_ro, start, end); +	err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);  	if (rw) { -		err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end); +		err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end, &tmp);  		if (!err)  			err = err2;  	}  	spin_unlock(&bl->bl_ext_lock); +	__ext_put_deviceids(&tmp);  	return err;  } @@ -396,12 +408,13 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,  	sector_t end = start + len;  	struct pnfs_block_extent *be;  	int err = 0; +	LIST_HEAD(tmp);  	spin_lock(&bl->bl_ext_lock);  	/*  	 * First remove all COW extents or holes from written to range.  	 */ -	err = __ext_tree_remove(&bl->bl_ext_ro, start, end); +	err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);  	if (err)  		goto out; @@ -459,6 +472,8 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,  	}  out:  	spin_unlock(&bl->bl_ext_lock); + +	__ext_put_deviceids(&tmp);  	return err;  } diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index aaa2e8d3df6f..c92a75e066a6 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -119,27 +119,30 @@ out:   * hashed by filehandle.   */  static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, -		struct nfs_fh *fh, nfs4_stateid *stateid) +		struct nfs_fh *fh)  {  	struct nfs_server *server; +	struct nfs_inode *nfsi;  	struct inode *ino;  	struct pnfs_layout_hdr *lo; +restart:  	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {  		list_for_each_entry(lo, &server->layouts, plh_layouts) { -			if (!nfs4_stateid_match_other(&lo->plh_stateid, stateid)) +			nfsi = NFS_I(lo->plh_inode); +			if (nfs_compare_fh(fh, &nfsi->fh))  				continue; -			if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh)) +			if (nfsi->layout != lo)  				continue;  			ino = igrab(lo->plh_inode);  			if (!ino)  				break;  			spin_lock(&ino->i_lock);  			/* Is this layout in the process of being freed? */ -			if (NFS_I(ino)->layout != lo) { +			if (nfsi->layout != lo) {  				spin_unlock(&ino->i_lock);  				iput(ino); -				break; +				goto restart;  			}  			pnfs_get_layout_hdr(lo);  			spin_unlock(&ino->i_lock); @@ -151,13 +154,13 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,  }  static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, -		struct nfs_fh *fh, nfs4_stateid *stateid) +		struct nfs_fh *fh)  {  	struct pnfs_layout_hdr *lo;  	spin_lock(&clp->cl_lock);  	rcu_read_lock(); -	lo = get_layout_by_fh_locked(clp, fh, stateid); +	lo = get_layout_by_fh_locked(clp, fh);  	rcu_read_unlock();  	spin_unlock(&clp->cl_lock); @@ -167,17 +170,39 @@ static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp,  /*   * Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing)   */ -static bool pnfs_check_stateid_sequence(struct pnfs_layout_hdr *lo, +static u32 pnfs_check_callback_stateid(struct pnfs_layout_hdr *lo,  					const nfs4_stateid *new)  {  	u32 oldseq, newseq; -	oldseq = be32_to_cpu(lo->plh_stateid.seqid); +	/* Is the stateid still not initialised? */ +	if (!pnfs_layout_is_valid(lo)) +		return NFS4ERR_DELAY; + +	/* Mismatched stateid? */ +	if (!nfs4_stateid_match_other(&lo->plh_stateid, new)) +		return NFS4ERR_BAD_STATEID; +  	newseq = be32_to_cpu(new->seqid); +	/* Are we already in a layout recall situation? */ +	if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) && +	    lo->plh_return_seq != 0) { +		if (newseq < lo->plh_return_seq) +			return NFS4ERR_OLD_STATEID; +		if (newseq > lo->plh_return_seq) +			return NFS4ERR_DELAY; +		goto out; +	} +	/* Check that the stateid matches what we think it should be. */ +	oldseq = be32_to_cpu(lo->plh_stateid.seqid);  	if (newseq > oldseq + 1) -		return false; -	return true; +		return NFS4ERR_DELAY; +	/* Crazy server! */ +	if (newseq <= oldseq) +		return NFS4ERR_OLD_STATEID; +out: +	return NFS_OK;  }  static u32 initiate_file_draining(struct nfs_client *clp, @@ -188,7 +213,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,  	u32 rv = NFS4ERR_NOMATCHING_LAYOUT;  	LIST_HEAD(free_me_list); -	lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid); +	lo = get_layout_by_fh(clp, &args->cbl_fh);  	if (!lo) {  		trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, NULL,  				&args->cbl_stateid, -rv); @@ -196,18 +221,15 @@ static u32 initiate_file_draining(struct nfs_client *clp,  	}  	ino = lo->plh_inode; +	pnfs_layoutcommit_inode(ino, false); +  	spin_lock(&ino->i_lock); -	if (!pnfs_check_stateid_sequence(lo, &args->cbl_stateid)) { -		rv = NFS4ERR_DELAY; +	rv = pnfs_check_callback_stateid(lo, &args->cbl_stateid); +	if (rv != NFS_OK)  		goto unlock; -	}  	pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); -	spin_unlock(&ino->i_lock); - -	pnfs_layoutcommit_inode(ino, false); -	spin_lock(&ino->i_lock);  	/*  	 * Enforce RFC5661 Section 12.5.5.2.1.5 (Bulk Recall and Return)  	 */ @@ -223,11 +245,13 @@ static u32 initiate_file_draining(struct nfs_client *clp,  		goto unlock;  	} +	/* Embrace your forgetfulness! */ +	rv = NFS4ERR_NOMATCHING_LAYOUT; +  	if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {  		NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,  			&args->cbl_range);  	} -	pnfs_mark_layout_returned_if_empty(lo);  unlock:  	spin_unlock(&ino->i_lock);  	pnfs_free_lseg_list(&free_me_list); diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index d81f96aacd51..656f68f7fe53 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -925,7 +925,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r  	if (hdr_arg.minorversion == 0) {  		cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident);  		if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) -			return rpc_drop_reply; +			goto out_invalidcred;  	}  	cps.minorversion = hdr_arg.minorversion; @@ -953,6 +953,10 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r  	nfs_put_client(cps.clp);  	dprintk("%s: done, status = %u\n", __func__, ntohl(status));  	return rpc_success; + +out_invalidcred: +	pr_warn_ratelimited("NFS: NFSv4 callback contains invalid cred\n"); +	return rpc_autherr_badcred;  }  /* diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 0c96528db94a..003ebce4bbc4 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -367,8 +367,6 @@ nfs_found_client(const struct nfs_client_initdata *cl_init,   */  struct nfs_client *  nfs_get_client(const struct nfs_client_initdata *cl_init, -	       const struct rpc_timeout *timeparms, -	       const char *ip_addr,  	       rpc_authflavor_t authflavour)  {  	struct nfs_client *clp, *new = NULL; @@ -399,7 +397,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,  					&nn->nfs_client_list);  			spin_unlock(&nn->nfs_client_lock);  			new->cl_flags = cl_init->init_flags; -			return rpc_ops->init_client(new, timeparms, ip_addr); +			return rpc_ops->init_client(new, cl_init);  		}  		spin_unlock(&nn->nfs_client_lock); @@ -470,7 +468,7 @@ EXPORT_SYMBOL_GPL(nfs_init_timeout_values);   * Create an RPC client handle   */  int nfs_create_rpc_client(struct nfs_client *clp, -			  const struct rpc_timeout *timeparms, +			  const struct nfs_client_initdata *cl_init,  			  rpc_authflavor_t flavor)  {  	struct rpc_clnt		*clnt = NULL; @@ -479,8 +477,9 @@ int nfs_create_rpc_client(struct nfs_client *clp,  		.protocol	= clp->cl_proto,  		.address	= (struct sockaddr *)&clp->cl_addr,  		.addrsize	= clp->cl_addrlen, -		.timeout	= timeparms, +		.timeout	= cl_init->timeparms,  		.servername	= clp->cl_hostname, +		.nodename	= cl_init->nodename,  		.program	= &nfs_program,  		.version	= clp->rpc_ops->version,  		.authflavor	= flavor, @@ -591,14 +590,12 @@ EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient);   * nfs_init_client - Initialise an NFS2 or NFS3 client   *   * @clp: nfs_client to initialise - * @timeparms: timeout parameters for underlying RPC transport - * @ip_addr: IP presentation address (not used) + * @cl_init: Initialisation parameters   *   * Returns pointer to an NFS client, or an ERR_PTR value.   */  struct nfs_client *nfs_init_client(struct nfs_client *clp, -		    const struct rpc_timeout *timeparms, -		    const char *ip_addr) +				   const struct nfs_client_initdata *cl_init)  {  	int error; @@ -612,7 +609,7 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp,  	 * Create a client RPC handle for doing FSSTAT with UNIX auth only  	 * - RFC 2623, sec 2.3.2  	 */ -	error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX); +	error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX);  	if (error < 0)  		goto error;  	nfs_mark_client_ready(clp, NFS_CS_READY); @@ -633,6 +630,7 @@ static int nfs_init_server(struct nfs_server *server,  			   const struct nfs_parsed_mount_data *data,  			   struct nfs_subversion *nfs_mod)  { +	struct rpc_timeout timeparms;  	struct nfs_client_initdata cl_init = {  		.hostname = data->nfs_server.hostname,  		.addr = (const struct sockaddr *)&data->nfs_server.address, @@ -640,8 +638,8 @@ static int nfs_init_server(struct nfs_server *server,  		.nfs_mod = nfs_mod,  		.proto = data->nfs_server.protocol,  		.net = data->net, +		.timeparms = &timeparms,  	}; -	struct rpc_timeout timeparms;  	struct nfs_client *clp;  	int error; @@ -653,7 +651,7 @@ static int nfs_init_server(struct nfs_server *server,  		set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);  	/* Allocate or find a client reference we can use */ -	clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX); +	clp = nfs_get_client(&cl_init, RPC_AUTH_UNIX);  	if (IS_ERR(clp)) {  		dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));  		return PTR_ERR(clp); @@ -1102,7 +1100,6 @@ static const struct file_operations nfs_server_list_fops = {  	.read		= seq_read,  	.llseek		= seq_lseek,  	.release	= seq_release_net, -	.owner		= THIS_MODULE,  };  static int nfs_volume_list_open(struct inode *inode, struct file *file); @@ -1123,7 +1120,6 @@ static const struct file_operations nfs_volume_list_fops = {  	.read		= seq_read,  	.llseek		= seq_lseek,  	.release	= seq_release_net, -	.owner		= THIS_MODULE,  };  /* diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index aaf7bd0cbae2..177fefb26c18 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -232,7 +232,7 @@ int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int le  	 * in a page cache page which kmemleak does not scan.  	 */  	kmemleak_not_leak(string->name); -	string->hash = full_name_hash(name, len); +	string->hash = full_name_hash(NULL, name, len);  	return 0;  } @@ -424,12 +424,17 @@ static int xdr_decode(nfs_readdir_descriptor_t *desc,  static  int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)  { +	struct inode *inode;  	struct nfs_inode *nfsi;  	if (d_really_is_negative(dentry))  		return 0; -	nfsi = NFS_I(d_inode(dentry)); +	inode = d_inode(dentry); +	if (is_bad_inode(inode) || NFS_STALE(inode)) +		return 0; + +	nfsi = NFS_I(inode);  	if (entry->fattr->fileid == nfsi->fileid)  		return 1;  	if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0) @@ -497,7 +502,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)  		if (filename.len == 2 && filename.name[1] == '.')  			return;  	} -	filename.hash = full_name_hash(filename.name, filename.len); +	filename.hash = full_name_hash(parent, filename.name, filename.len);  	dentry = d_lookup(parent, &filename);  again: @@ -729,7 +734,7 @@ struct page *get_cache_page(nfs_readdir_descriptor_t *desc)  	struct page *page;  	for (;;) { -		page = read_cache_page(file_inode(desc->file)->i_mapping, +		page = read_cache_page(desc->file->f_mapping,  			desc->page_index, (filler_t *)nfs_readdir_filler, desc);  		if (IS_ERR(page) || grab_page(page))  			break; @@ -1363,7 +1368,6 @@ EXPORT_SYMBOL_GPL(nfs_dentry_operations);  struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)  {  	struct dentry *res; -	struct dentry *parent;  	struct inode *inode = NULL;  	struct nfs_fh *fhandle = NULL;  	struct nfs_fattr *fattr = NULL; @@ -1393,20 +1397,18 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in  	if (IS_ERR(label))  		goto out; -	parent = dentry->d_parent; -	/* Protect against concurrent sillydeletes */  	trace_nfs_lookup_enter(dir, dentry, flags);  	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);  	if (error == -ENOENT)  		goto no_entry;  	if (error < 0) {  		res = ERR_PTR(error); -		goto out_unblock_sillyrename; +		goto out_label;  	}  	inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);  	res = ERR_CAST(inode);  	if (IS_ERR(res)) -		goto out_unblock_sillyrename; +		goto out_label;  	/* Success: notify readdir to use READDIRPLUS */  	nfs_advise_use_readdirplus(dir); @@ -1415,11 +1417,11 @@ no_entry:  	res = d_splice_alias(inode, dentry);  	if (res != NULL) {  		if (IS_ERR(res)) -			goto out_unblock_sillyrename; +			goto out_label;  		dentry = res;  	}  	nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); -out_unblock_sillyrename: +out_label:  	trace_nfs_lookup_exit(dir, dentry, flags, error);  	nfs4_label_free(label);  out: @@ -1482,11 +1484,13 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,  		    struct file *file, unsigned open_flags,  		    umode_t mode, int *opened)  { +	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);  	struct nfs_open_context *ctx;  	struct dentry *res;  	struct iattr attr = { .ia_valid = ATTR_OPEN };  	struct inode *inode;  	unsigned int lookup_flags = 0; +	bool switched = false;  	int err;  	/* Expect a negative dentry */ @@ -1501,7 +1505,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,  	/* NFS only supports OPEN on regular files */  	if ((open_flags & O_DIRECTORY)) { -		if (!d_unhashed(dentry)) { +		if (!d_in_lookup(dentry)) {  			/*  			 * Hashed negative dentry with O_DIRECTORY: dentry was  			 * revalidated and is fine, no need to perform lookup @@ -1525,6 +1529,17 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,  		attr.ia_size = 0;  	} +	if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) { +		d_drop(dentry); +		switched = true; +		dentry = d_alloc_parallel(dentry->d_parent, +					  &dentry->d_name, &wq); +		if (IS_ERR(dentry)) +			return PTR_ERR(dentry); +		if (unlikely(!d_in_lookup(dentry))) +			return finish_no_open(file, dentry); +	} +  	ctx = create_nfs_open_context(dentry, open_flags);  	err = PTR_ERR(ctx);  	if (IS_ERR(ctx)) @@ -1536,9 +1551,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,  		err = PTR_ERR(inode);  		trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);  		put_nfs_open_context(ctx); +		d_drop(dentry);  		switch (err) {  		case -ENOENT: -			d_drop(dentry);  			d_add(dentry, NULL);  			nfs_set_verifier(dentry, nfs_save_change_attribute(dir));  			break; @@ -1560,14 +1575,23 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,  	trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);  	put_nfs_open_context(ctx);  out: +	if (unlikely(switched)) { +		d_lookup_done(dentry); +		dput(dentry); +	}  	return err;  no_open:  	res = nfs_lookup(dir, dentry, lookup_flags); -	err = PTR_ERR(res); +	if (switched) { +		d_lookup_done(dentry); +		if (!res) +			res = dentry; +		else +			dput(dentry); +	}  	if (IS_ERR(res)) -		goto out; - +		return PTR_ERR(res);  	return finish_no_open(file, res);  }  EXPORT_SYMBOL_GPL(nfs_atomic_open); @@ -2228,21 +2252,37 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, st  	return NULL;  } -static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res, bool may_block)  {  	struct nfs_inode *nfsi = NFS_I(inode);  	struct nfs_access_entry *cache; -	int err = -ENOENT; +	bool retry = true; +	int err;  	spin_lock(&inode->i_lock); -	if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS) -		goto out_zap; -	cache = nfs_access_search_rbtree(inode, cred); -	if (cache == NULL) -		goto out; -	if (!nfs_have_delegated_attributes(inode) && -	    !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) -		goto out_stale; +	for(;;) { +		if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS) +			goto out_zap; +		cache = nfs_access_search_rbtree(inode, cred); +		err = -ENOENT; +		if (cache == NULL) +			goto out; +		/* Found an entry, is our attribute cache valid? */ +		if (!nfs_attribute_cache_expired(inode) && +		    !(nfsi->cache_validity & NFS_INO_INVALID_ATTR)) +			break; +		err = -ECHILD; +		if (!may_block) +			goto out; +		if (!retry) +			goto out_zap; +		spin_unlock(&inode->i_lock); +		err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); +		if (err) +			return err; +		spin_lock(&inode->i_lock); +		retry = false; +	}  	res->jiffies = cache->jiffies;  	res->cred = cache->cred;  	res->mask = cache->mask; @@ -2251,12 +2291,6 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str  out:  	spin_unlock(&inode->i_lock);  	return err; -out_stale: -	rb_erase(&cache->rb_node, &nfsi->access_cache); -	list_del(&cache->lru); -	spin_unlock(&inode->i_lock); -	nfs_access_free_entry(cache); -	return -ENOENT;  out_zap:  	spin_unlock(&inode->i_lock);  	nfs_access_zap_cache(inode); @@ -2283,13 +2317,12 @@ static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred,  		cache = NULL;  	if (cache == NULL)  		goto out; -	if (!nfs_have_delegated_attributes(inode) && -	    !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) +	err = nfs_revalidate_inode_rcu(NFS_SERVER(inode), inode); +	if (err)  		goto out;  	res->jiffies = cache->jiffies;  	res->cred = cache->cred;  	res->mask = cache->mask; -	err = 0;  out:  	rcu_read_unlock();  	return err; @@ -2378,18 +2411,19 @@ EXPORT_SYMBOL_GPL(nfs_access_set_mask);  static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)  {  	struct nfs_access_entry cache; +	bool may_block = (mask & MAY_NOT_BLOCK) == 0;  	int status;  	trace_nfs_access_enter(inode);  	status = nfs_access_get_cached_rcu(inode, cred, &cache);  	if (status != 0) -		status = nfs_access_get_cached(inode, cred, &cache); +		status = nfs_access_get_cached(inode, cred, &cache, may_block);  	if (status == 0)  		goto out_cached;  	status = -ECHILD; -	if (mask & MAY_NOT_BLOCK) +	if (!may_block)  		goto out;  	/* Be clever: ask server to check for all possible rights */ diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 979b3c4dee6a..72b7d13ee3c6 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -196,6 +196,12 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,  	WARN_ON_ONCE(verfp->committed < 0);  } +static int nfs_direct_cmp_verf(const struct nfs_writeverf *v1, +		const struct nfs_writeverf *v2) +{ +	return nfs_write_verifier_cmp(&v1->verifier, &v2->verifier); +} +  /*   * nfs_direct_cmp_hdr_verf - compare verifier for pgio header   * @dreq - direct request possibly spanning multiple servers @@ -215,7 +221,7 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,  		nfs_direct_set_hdr_verf(dreq, hdr);  		return 0;  	} -	return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); +	return nfs_direct_cmp_verf(verfp, &hdr->verf);  }  /* @@ -238,15 +244,13 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,  	if (verfp->committed < 0)  		return 1; -	return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); +	return nfs_direct_cmp_verf(verfp, &data->verf);  }  /**   * nfs_direct_IO - NFS address space operation for direct I/O   * @iocb: target I/O control block - * @iov: array of vectors that define I/O buffer - * @pos: offset in file to begin the operation - * @nr_segs: size of iovec array + * @iter: I/O buffer   *   * The presence of this routine in the address space ops vector means   * the NFS client supports direct I/O. However, for most direct IO, we @@ -353,10 +357,12 @@ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)  	result = wait_for_completion_killable(&dreq->completion); +	if (!result) { +		result = dreq->count; +		WARN_ON_ONCE(dreq->count < 0); +	}  	if (!result)  		result = dreq->error; -	if (!result) -		result = dreq->count;  out:  	return (ssize_t) result; @@ -366,28 +372,18 @@ out:   * Synchronous I/O uses a stack-allocated iocb.  Thus we can't trust   * the iocb is still valid here if this is a synchronous request.   */ -static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write) +static void nfs_direct_complete(struct nfs_direct_req *dreq)  {  	struct inode *inode = dreq->inode; -	if (dreq->iocb && write) { -		loff_t pos = dreq->iocb->ki_pos + dreq->count; - -		spin_lock(&inode->i_lock); -		if (i_size_read(inode) < pos) -			i_size_write(inode, pos); -		spin_unlock(&inode->i_lock); -	} - -	if (write) -		nfs_zap_mapping(inode, inode->i_mapping); -  	inode_dio_end(inode);  	if (dreq->iocb) {  		long res = (long) dreq->error; -		if (!res) +		if (dreq->count != 0) {  			res = (long) dreq->count; +			WARN_ON_ONCE(dreq->count < 0); +		}  		dreq->iocb->ki_complete(dreq->iocb, res, 0);  	} @@ -434,7 +430,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)  	}  out_put:  	if (put_dreq(dreq)) -		nfs_direct_complete(dreq, false); +		nfs_direct_complete(dreq);  	hdr->release(hdr);  } @@ -540,7 +536,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,  	}  	if (put_dreq(dreq)) -		nfs_direct_complete(dreq, false); +		nfs_direct_complete(dreq);  	return 0;  } @@ -581,17 +577,12 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)  	if (!count)  		goto out; -	inode_lock(inode); -	result = nfs_sync_mapping(mapping); -	if (result) -		goto out_unlock; -  	task_io_account_read(count);  	result = -ENOMEM;  	dreq = nfs_direct_req_alloc();  	if (dreq == NULL) -		goto out_unlock; +		goto out;  	dreq->inode = inode;  	dreq->bytes_left = dreq->max_count = count; @@ -606,10 +597,12 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)  	if (!is_sync_kiocb(iocb))  		dreq->iocb = iocb; +	nfs_start_io_direct(inode); +  	NFS_I(inode)->read_io += count;  	result = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); -	inode_unlock(inode); +	nfs_end_io_direct(inode);  	if (!result) {  		result = nfs_direct_wait(dreq); @@ -617,13 +610,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)  			iocb->ki_pos += result;  	} -	nfs_direct_req_release(dreq); -	return result; -  out_release:  	nfs_direct_req_release(dreq); -out_unlock: -	inode_unlock(inode);  out:  	return result;  } @@ -655,6 +643,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)  	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);  	dreq->count = 0; +	dreq->verf.committed = NFS_INVALID_STABLE_HOW; +	nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);  	for (i = 0; i < dreq->mirror_count; i++)  		dreq->mirrors[i].count = 0;  	get_dreq(dreq); @@ -773,7 +763,8 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)  			nfs_direct_write_reschedule(dreq);  			break;  		default: -			nfs_direct_complete(dreq, true); +			nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping); +			nfs_direct_complete(dreq);  	}  } @@ -989,6 +980,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,  ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)  {  	ssize_t result = -EINVAL; +	size_t count;  	struct file *file = iocb->ki_filp;  	struct address_space *mapping = file->f_mapping;  	struct inode *inode = mapping->host; @@ -999,34 +991,24 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)  	dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",  		file, iov_iter_count(iter), (long long) iocb->ki_pos); -	nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, -		      iov_iter_count(iter)); +	result = generic_write_checks(iocb, iter); +	if (result <= 0) +		return result; +	count = result; +	nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);  	pos = iocb->ki_pos;  	end = (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT; -	inode_lock(inode); - -	result = nfs_sync_mapping(mapping); -	if (result) -		goto out_unlock; - -	if (mapping->nrpages) { -		result = invalidate_inode_pages2_range(mapping, -					pos >> PAGE_SHIFT, end); -		if (result) -			goto out_unlock; -	} - -	task_io_account_write(iov_iter_count(iter)); +	task_io_account_write(count);  	result = -ENOMEM;  	dreq = nfs_direct_req_alloc();  	if (!dreq) -		goto out_unlock; +		goto out;  	dreq->inode = inode; -	dreq->bytes_left = dreq->max_count = iov_iter_count(iter); +	dreq->bytes_left = dreq->max_count = count;  	dreq->io_start = pos;  	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));  	l_ctx = nfs_get_lock_context(dreq->ctx); @@ -1038,6 +1020,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)  	if (!is_sync_kiocb(iocb))  		dreq->iocb = iocb; +	nfs_start_io_direct(inode); +  	result = nfs_direct_write_schedule_iovec(dreq, iter, pos);  	if (mapping->nrpages) { @@ -1045,30 +1029,19 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)  					      pos >> PAGE_SHIFT, end);  	} -	inode_unlock(inode); +	nfs_end_io_direct(inode);  	if (!result) {  		result = nfs_direct_wait(dreq);  		if (result > 0) { -			struct inode *inode = mapping->host; -  			iocb->ki_pos = pos + result; -			spin_lock(&inode->i_lock); -			if (i_size_read(inode) < iocb->ki_pos) -				i_size_write(inode, iocb->ki_pos); -			spin_unlock(&inode->i_lock); -  			/* XXX: should check the generic_write_sync retval */  			generic_write_sync(iocb, result);  		}  	} -	nfs_direct_req_release(dreq); -	return result; -  out_release:  	nfs_direct_req_release(dreq); -out_unlock: -	inode_unlock(inode); +out:  	return result;  } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 717a8d6af52d..7d620970f2e1 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -170,12 +170,14 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)  		iocb->ki_filp,  		iov_iter_count(to), (unsigned long) iocb->ki_pos); -	result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping); +	nfs_start_io_read(inode); +	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);  	if (!result) {  		result = generic_file_read_iter(iocb, to);  		if (result > 0)  			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);  	} +	nfs_end_io_read(inode);  	return result;  }  EXPORT_SYMBOL_GPL(nfs_file_read); @@ -191,12 +193,14 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,  	dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",  		filp, (unsigned long) count, (unsigned long long) *ppos); -	res = nfs_revalidate_mapping_protected(inode, filp->f_mapping); +	nfs_start_io_read(inode); +	res = nfs_revalidate_mapping(inode, filp->f_mapping);  	if (!res) {  		res = generic_file_splice_read(filp, ppos, pipe, count, flags);  		if (res > 0)  			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res);  	} +	nfs_end_io_read(inode);  	return res;  }  EXPORT_SYMBOL_GPL(nfs_file_splice_read); @@ -272,16 +276,13 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)  	trace_nfs_fsync_enter(inode); -	inode_dio_wait(inode);  	do {  		ret = filemap_write_and_wait_range(inode->i_mapping, start, end);  		if (ret != 0)  			break; -		inode_lock(inode);  		ret = nfs_file_fsync_commit(file, start, end, datasync);  		if (!ret)  			ret = pnfs_sync_inode(inode, !!datasync); -		inode_unlock(inode);  		/*  		 * If nfs_file_fsync_commit detected a server reboot, then  		 * resend all dirty pages that might have been covered by @@ -359,19 +360,6 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,  		file, mapping->host->i_ino, len, (long long) pos);  start: -	/* -	 * Prevent starvation issues if someone is doing a consistency -	 * sync-to-disk -	 */ -	ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING, -				 nfs_wait_bit_killable, TASK_KILLABLE); -	if (ret) -		return ret; -	/* -	 * Wait for O_DIRECT to complete -	 */ -	inode_dio_wait(mapping->host); -  	page = grab_cache_page_write_begin(mapping, index, flags);  	if (!page)  		return -ENOMEM; @@ -432,7 +420,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,  		return status;  	NFS_I(mapping->host)->write_io += copied; -	if (nfs_ctx_key_to_expire(ctx)) { +	if (nfs_ctx_key_to_expire(ctx, mapping->host)) {  		status = nfs_wb_all(mapping->host);  		if (status < 0)  			return status; @@ -470,31 +458,8 @@ static void nfs_invalidate_page(struct page *page, unsigned int offset,   */  static int nfs_release_page(struct page *page, gfp_t gfp)  { -	struct address_space *mapping = page->mapping; -  	dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); -	/* Always try to initiate a 'commit' if relevant, but only -	 * wait for it if the caller allows blocking.  Even then, -	 * only wait 1 second and only if the 'bdi' is not congested. -	 * Waiting indefinitely can cause deadlocks when the NFS -	 * server is on this machine, when a new TCP connection is -	 * needed and in other rare cases.  There is no particular -	 * need to wait extensively here.  A short wait has the -	 * benefit that someone else can worry about the freezer. -	 */ -	if (mapping) { -		struct nfs_server *nfss = NFS_SERVER(mapping->host); -		nfs_commit_inode(mapping->host, 0); -		if (gfpflags_allow_blocking(gfp) && -		    !bdi_write_congested(&nfss->backing_dev_info)) { -			wait_on_page_bit_killable_timeout(page, PG_private, -							  HZ); -			if (PagePrivate(page)) -				set_bdi_congested(&nfss->backing_dev_info, -						  BLK_RW_ASYNC); -		} -	}  	/* If PagePrivate() is set, then the page is not freeable */  	if (PagePrivate(page))  		return 0; @@ -604,6 +569,8 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)  		filp, filp->f_mapping->host->i_ino,  		(long long)page_offset(page)); +	sb_start_pagefault(inode->i_sb); +  	/* make sure the cache has finished storing the page */  	nfs_fscache_wait_on_page_write(NFS_I(inode), page); @@ -630,6 +597,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)  out_unlock:  	unlock_page(page);  out: +	sb_end_pagefault(inode->i_sb);  	return ret;  } @@ -645,7 +613,7 @@ static int nfs_need_check_write(struct file *filp, struct inode *inode)  	ctx = nfs_file_open_context(filp);  	if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) || -	    nfs_ctx_key_to_expire(ctx)) +	    nfs_ctx_key_to_expire(ctx, inode))  		return 1;  	return 0;  } @@ -656,23 +624,17 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)  	struct inode *inode = file_inode(file);  	unsigned long written = 0;  	ssize_t result; -	size_t count = iov_iter_count(from);  	result = nfs_key_timeout_notify(file, inode);  	if (result)  		return result; -	if (iocb->ki_flags & IOCB_DIRECT) { -		result = generic_write_checks(iocb, from); -		if (result <= 0) -			return result; +	if (iocb->ki_flags & IOCB_DIRECT)  		return nfs_file_direct_write(iocb, from); -	}  	dprintk("NFS: write(%pD2, %zu@%Ld)\n", -		file, count, (long long) iocb->ki_pos); +		file, iov_iter_count(from), (long long) iocb->ki_pos); -	result = -EBUSY;  	if (IS_SWAPFILE(inode))  		goto out_swapfile;  	/* @@ -684,28 +646,33 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)  			goto out;  	} -	result = count; -	if (!count) +	nfs_start_io_write(inode); +	result = generic_write_checks(iocb, from); +	if (result > 0) { +		current->backing_dev_info = inode_to_bdi(inode); +		result = generic_perform_write(file, from, iocb->ki_pos); +		current->backing_dev_info = NULL; +	} +	nfs_end_io_write(inode); +	if (result <= 0)  		goto out; -	result = generic_file_write_iter(iocb, from); -	if (result > 0) -		written = result; +	written = generic_write_sync(iocb, result); +	iocb->ki_pos += written;  	/* Return error values */ -	if (result >= 0 && nfs_need_check_write(file, inode)) { +	if (nfs_need_check_write(file, inode)) {  		int err = vfs_fsync(file, 0);  		if (err < 0)  			result = err;  	} -	if (result > 0) -		nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); +	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);  out:  	return result;  out_swapfile:  	printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); -	goto out; +	return -EBUSY;  }  EXPORT_SYMBOL_GPL(nfs_file_write); @@ -780,11 +747,6 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)  }  static int -is_time_granular(struct timespec *ts) { -	return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000)); -} - -static int  do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)  {  	struct inode *inode = filp->f_mapping->host; @@ -817,12 +779,8 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)  	 * This makes locking act as a cache coherency point.  	 */  	nfs_sync_mapping(filp->f_mapping); -	if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) { -		if (is_time_granular(&NFS_SERVER(inode)->time_delta)) -			__nfs_revalidate_inode(NFS_SERVER(inode), inode); -		else -			nfs_zap_caches(inode); -	} +	if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) +		nfs_zap_mapping(inode, filp->f_mapping);  out:  	return status;  } diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index aa59757389dc..a3fc48ba4931 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -255,13 +255,16 @@ static int filelayout_read_done_cb(struct rpc_task *task,  static void  filelayout_set_layoutcommit(struct nfs_pgio_header *hdr)  { +	loff_t end_offs = 0;  	if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || -	    hdr->res.verf->committed != NFS_DATA_SYNC) +	    hdr->res.verf->committed == NFS_FILE_SYNC)  		return; +	if (hdr->res.verf->committed == NFS_DATA_SYNC) +		end_offs = hdr->mds_offset + (loff_t)hdr->res.count; -	pnfs_set_layoutcommit(hdr->inode, hdr->lseg, -			hdr->mds_offset + hdr->res.count); +	/* Note: if the write is unstable, don't set end_offs until commit */ +	pnfs_set_layoutcommit(hdr->inode, hdr->lseg, end_offs);  	dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,  		(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);  } @@ -354,6 +357,12 @@ static int filelayout_write_done_cb(struct rpc_task *task,  	}  	filelayout_set_layoutcommit(hdr); + +	/* zero out the fattr */ +	hdr->fattr.valid = 0; +	if (task->tk_status >= 0) +		nfs_writeback_update_inode(hdr); +  	return 0;  } @@ -375,8 +384,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,  		return -EAGAIN;  	} -	if (data->verf.committed == NFS_UNSTABLE) -		pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); +	pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb);  	return 0;  } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 0e8018bc9880..e6206eaf2bdf 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1325,15 +1325,16 @@ ff_layout_need_layoutcommit(struct pnfs_layout_segment *lseg)   * we always send layoutcommit after DS writes.   */  static void -ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) +ff_layout_set_layoutcommit(struct inode *inode, +		struct pnfs_layout_segment *lseg, +		loff_t end_offset)  { -	if (!ff_layout_need_layoutcommit(hdr->lseg)) +	if (!ff_layout_need_layoutcommit(lseg))  		return; -	pnfs_set_layoutcommit(hdr->inode, hdr->lseg, -			hdr->mds_offset + hdr->res.count); -	dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, -		(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); +	pnfs_set_layoutcommit(inode, lseg, end_offset); +	dprintk("%s inode %lu pls_end_pos %llu\n", __func__, inode->i_ino, +		(unsigned long long) NFS_I(inode)->layout->plh_lwb);  }  static bool @@ -1469,6 +1470,7 @@ static void ff_layout_read_release(void *data)  static int ff_layout_write_done_cb(struct rpc_task *task,  				struct nfs_pgio_header *hdr)  { +	loff_t end_offs = 0;  	int err;  	trace_nfs4_pnfs_write(hdr, task->tk_status); @@ -1494,7 +1496,10 @@ static int ff_layout_write_done_cb(struct rpc_task *task,  	if (hdr->res.verf->committed == NFS_FILE_SYNC ||  	    hdr->res.verf->committed == NFS_DATA_SYNC) -		ff_layout_set_layoutcommit(hdr); +		end_offs = hdr->mds_offset + (loff_t)hdr->res.count; + +	/* Note: if the write is unstable, don't set end_offs until commit */ +	ff_layout_set_layoutcommit(hdr->inode, hdr->lseg, end_offs);  	/* zero out fattr since we don't care DS attr at all */  	hdr->fattr.valid = 0; @@ -1530,9 +1535,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,  		return -EAGAIN;  	} -	if (data->verf.committed == NFS_UNSTABLE -	    && ff_layout_need_layoutcommit(data->lseg)) -		pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); +	ff_layout_set_layoutcommit(data->inode, data->lseg, data->lwb);  	return 0;  } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 52e7d6869e3b..bf4ec5ecc97e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -282,6 +282,7 @@ nfs_init_locked(struct inode *inode, void *opaque)  	struct nfs_fattr	*fattr = desc->fattr;  	set_nfs_fileid(inode, fattr->fileid); +	inode->i_mode = fattr->mode;  	nfs_copy_fh(NFS_FH(inode), desc->fh);  	return 0;  } @@ -661,9 +662,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)  	trace_nfs_getattr_enter(inode);  	/* Flush out writes to the server in order to update c/mtime.  */  	if (S_ISREG(inode->i_mode)) { -		inode_lock(inode); -		err = nfs_sync_inode(inode); -		inode_unlock(inode); +		err = filemap_write_and_wait(inode->i_mapping);  		if (err)  			goto out;  	} @@ -878,7 +877,10 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx)  	struct nfs_inode *nfsi = NFS_I(inode);  	spin_lock(&inode->i_lock); -	list_add(&ctx->list, &nfsi->open_files); +	if (ctx->mode & FMODE_WRITE) +		list_add(&ctx->list, &nfsi->open_files); +	else +		list_add_tail(&ctx->list, &nfsi->open_files);  	spin_unlock(&inode->i_lock);  }  EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context); @@ -971,6 +973,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)  	if (NFS_STALE(inode))  		goto out; +	/* pNFS: Attributes aren't updated until we layoutcommit */ +	if (S_ISREG(inode->i_mode)) { +		status = pnfs_sync_inode(inode, false); +		if (status) +			goto out; +	} +  	status = -ENOMEM;  	fattr = nfs_alloc_fattr();  	if (fattr == NULL) @@ -1121,14 +1130,12 @@ out:  }  /** - * __nfs_revalidate_mapping - Revalidate the pagecache + * nfs_revalidate_mapping - Revalidate the pagecache   * @inode - pointer to host inode   * @mapping - pointer to mapping - * @may_lock - take inode->i_mutex?   */ -static int __nfs_revalidate_mapping(struct inode *inode, -		struct address_space *mapping, -		bool may_lock) +int nfs_revalidate_mapping(struct inode *inode, +		struct address_space *mapping)  {  	struct nfs_inode *nfsi = NFS_I(inode);  	unsigned long *bitlock = &nfsi->flags; @@ -1177,12 +1184,7 @@ static int __nfs_revalidate_mapping(struct inode *inode,  	nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;  	spin_unlock(&inode->i_lock);  	trace_nfs_invalidate_mapping_enter(inode); -	if (may_lock) { -		inode_lock(inode); -		ret = nfs_invalidate_mapping(inode, mapping); -		inode_unlock(inode); -	} else -		ret = nfs_invalidate_mapping(inode, mapping); +	ret = nfs_invalidate_mapping(inode, mapping);  	trace_nfs_invalidate_mapping_exit(inode, ret);  	clear_bit_unlock(NFS_INO_INVALIDATING, bitlock); @@ -1192,27 +1194,28 @@ out:  	return ret;  } -/** - * nfs_revalidate_mapping - Revalidate the pagecache - * @inode - pointer to host inode - * @mapping - pointer to mapping - */ -int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +static bool nfs_file_has_writers(struct nfs_inode *nfsi)  { -	return __nfs_revalidate_mapping(inode, mapping, false); +	struct inode *inode = &nfsi->vfs_inode; + +	assert_spin_locked(&inode->i_lock); + +	if (!S_ISREG(inode->i_mode)) +		return false; +	if (list_empty(&nfsi->open_files)) +		return false; +	/* Note: This relies on nfsi->open_files being ordered with writers +	 *       being placed at the head of the list. +	 *       See nfs_inode_attach_open_context() +	 */ +	return (list_first_entry(&nfsi->open_files, +			struct nfs_open_context, +			list)->mode & FMODE_WRITE) == FMODE_WRITE;  } -/** - * nfs_revalidate_mapping_protected - Revalidate the pagecache - * @inode - pointer to host inode - * @mapping - pointer to mapping - * - * Differs from nfs_revalidate_mapping() in that it grabs the inode->i_mutex - * while invalidating the mapping. - */ -int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping) +static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi)  { -	return __nfs_revalidate_mapping(inode, mapping, true); +	return nfs_file_has_writers(nfsi) && nfs_file_io_is_buffered(nfsi);  }  static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) @@ -1279,22 +1282,24 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat  	if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))  		return -EIO; -	if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && -			inode->i_version != fattr->change_attr) -		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; +	if (!nfs_file_has_buffered_writers(nfsi)) { +		/* Verify a few of the more important attributes */ +		if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && inode->i_version != fattr->change_attr) +			invalid |= NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE; -	/* Verify a few of the more important attributes */ -	if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) -		invalid |= NFS_INO_INVALID_ATTR; +		if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) +			invalid |= NFS_INO_INVALID_ATTR; -	if (fattr->valid & NFS_ATTR_FATTR_SIZE) { -		cur_size = i_size_read(inode); -		new_isize = nfs_size_to_loff_t(fattr->size); -		if (cur_size != new_isize) -			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; +		if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&inode->i_ctime, &fattr->ctime)) +			invalid |= NFS_INO_INVALID_ATTR; + +		if (fattr->valid & NFS_ATTR_FATTR_SIZE) { +			cur_size = i_size_read(inode); +			new_isize = nfs_size_to_loff_t(fattr->size); +			if (cur_size != new_isize) +				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; +		}  	} -	if (nfsi->nrequests != 0) -		invalid &= ~NFS_INO_REVAL_PAGECACHE;  	/* Have any file permissions changed? */  	if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) @@ -1469,28 +1474,12 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n  		((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);  } -/* - * Don't trust the change_attribute, mtime, ctime or size if - * a pnfs LAYOUTCOMMIT is outstanding - */ -static void nfs_inode_attrs_handle_layoutcommit(struct inode *inode, -		struct nfs_fattr *fattr) -{ -	if (pnfs_layoutcommit_outstanding(inode)) -		fattr->valid &= ~(NFS_ATTR_FATTR_CHANGE | -				NFS_ATTR_FATTR_MTIME | -				NFS_ATTR_FATTR_CTIME | -				NFS_ATTR_FATTR_SIZE); -} -  static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr)  {  	int ret;  	trace_nfs_refresh_inode_enter(inode); -	nfs_inode_attrs_handle_layoutcommit(inode, fattr); -  	if (nfs_inode_attrs_need_update(inode, fattr))  		ret = nfs_update_inode(inode, fattr);  	else @@ -1526,7 +1515,7 @@ EXPORT_SYMBOL_GPL(nfs_refresh_inode);  static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr)  { -	unsigned long invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; +	unsigned long invalid = NFS_INO_INVALID_ATTR;  	/*  	 * Don't revalidate the pagecache if we hold a delegation, but do @@ -1675,6 +1664,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)  	unsigned long invalid = 0;  	unsigned long now = jiffies;  	unsigned long save_cache_validity; +	bool have_writers = nfs_file_has_buffered_writers(nfsi);  	bool cache_revalidated = true;  	dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", @@ -1724,17 +1714,25 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)  	/* Do atomic weak cache consistency updates */  	invalid |= nfs_wcc_update_inode(inode, fattr); +	if (pnfs_layoutcommit_outstanding(inode)) { +		nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_ATTR; +		cache_revalidated = false; +	} +  	/* More cache consistency checks */  	if (fattr->valid & NFS_ATTR_FATTR_CHANGE) {  		if (inode->i_version != fattr->change_attr) {  			dprintk("NFS: change_attr change on server for file %s/%ld\n",  					inode->i_sb->s_id, inode->i_ino); -			invalid |= NFS_INO_INVALID_ATTR -				| NFS_INO_INVALID_DATA -				| NFS_INO_INVALID_ACCESS -				| NFS_INO_INVALID_ACL; -			if (S_ISDIR(inode->i_mode)) -				nfs_force_lookup_revalidate(inode); +			/* Could it be a race with writeback? */ +			if (!have_writers) { +				invalid |= NFS_INO_INVALID_ATTR +					| NFS_INO_INVALID_DATA +					| NFS_INO_INVALID_ACCESS +					| NFS_INO_INVALID_ACL; +				if (S_ISDIR(inode->i_mode)) +					nfs_force_lookup_revalidate(inode); +			}  			inode->i_version = fattr->change_attr;  		}  	} else { @@ -1767,9 +1765,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)  		if (new_isize != cur_isize) {  			/* Do we perhaps have any outstanding writes, or has  			 * the file grown beyond our last write? */ -			if ((nfsi->nrequests == 0) || new_isize > cur_isize) { +			if (nfsi->nrequests == 0 || new_isize > cur_isize) {  				i_size_write(inode, new_isize); -				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; +				if (!have_writers) +					invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;  			}  			dprintk("NFS: isize change on server for file %s/%ld "  					"(%Ld to %Ld)\n", diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5154fa65a2f2..7ce5e023c3c3 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -66,13 +66,16 @@ struct nfs_clone_mount {  struct nfs_client_initdata {  	unsigned long init_flags; -	const char *hostname; -	const struct sockaddr *addr; +	const char *hostname;			/* Hostname of the server */ +	const struct sockaddr *addr;		/* Address of the server */ +	const char *nodename;			/* Hostname of the client */ +	const char *ip_addr;			/* IP address of the client */  	size_t addrlen;  	struct nfs_subversion *nfs_mod;  	int proto;  	u32 minorversion;  	struct net *net; +	const struct rpc_timeout *timeparms;  };  /* @@ -147,9 +150,8 @@ extern void nfs_umount(const struct nfs_mount_request *info);  extern const struct rpc_program nfs_program;  extern void nfs_clients_init(struct net *net);  extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); -int nfs_create_rpc_client(struct nfs_client *, const struct rpc_timeout *, rpc_authflavor_t); +int nfs_create_rpc_client(struct nfs_client *, const struct nfs_client_initdata *, rpc_authflavor_t);  struct nfs_client *nfs_get_client(const struct nfs_client_initdata *, -				  const struct rpc_timeout *, const char *,  				  rpc_authflavor_t);  int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *);  void nfs_server_insert_lists(struct nfs_server *); @@ -184,7 +186,7 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,  					   rpc_authflavor_t);  extern int nfs_wait_client_init_complete(const struct nfs_client *clp);  extern void nfs_mark_client_ready(struct nfs_client *clp, int state); -extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, +extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,  					     const struct sockaddr *ds_addr,  					     int ds_addrlen, int ds_proto,  					     unsigned int ds_timeo, @@ -193,7 +195,7 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,  					     rpc_authflavor_t au_flavor);  extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *,  						struct inode *); -extern struct nfs_client *nfs3_set_ds_client(struct nfs_client *mds_clp, +extern struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,  			const struct sockaddr *ds_addr, int ds_addrlen,  			int ds_proto, unsigned int ds_timeo,  			unsigned int ds_retrans, rpc_authflavor_t au_flavor); @@ -338,8 +340,7 @@ nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src)  /* proc.c */  void nfs_close_context(struct nfs_open_context *ctx, int is_sync);  extern struct nfs_client *nfs_init_client(struct nfs_client *clp, -			   const struct rpc_timeout *timeparms, -			   const char *ip_addr); +			   const struct nfs_client_initdata *);  /* dir.c */  extern void nfs_force_use_readdirplus(struct inode *dir); @@ -411,6 +412,19 @@ extern void __exit unregister_nfs_fs(void);  extern bool nfs_sb_active(struct super_block *sb);  extern void nfs_sb_deactive(struct super_block *sb); +/* io.c */ +extern void nfs_start_io_read(struct inode *inode); +extern void nfs_end_io_read(struct inode *inode); +extern void nfs_start_io_write(struct inode *inode); +extern void nfs_end_io_write(struct inode *inode); +extern void nfs_start_io_direct(struct inode *inode); +extern void nfs_end_io_direct(struct inode *inode); + +static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi) +{ +	return test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0; +} +  /* namespace.c */  #define NFS_PATH_CANONICAL 1  extern char *nfs_path(char **p, struct dentry *dentry, @@ -496,9 +510,29 @@ void nfs_init_cinfo(struct nfs_commit_info *cinfo,  		    struct inode *inode,  		    struct nfs_direct_req *dreq);  int nfs_key_timeout_notify(struct file *filp, struct inode *inode); -bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx); +bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode);  void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio); +int nfs_filemap_write_and_wait_range(struct address_space *mapping, +		loff_t lstart, loff_t lend); + +#ifdef CONFIG_NFS_V4_1 +static inline +void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo) +{ +	int i; + +	for (i = 0; i < cinfo->nbuckets; i++) +		cinfo->buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW; +} +#else +static inline +void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo) +{ +} +#endif + +  #ifdef CONFIG_MIGRATION  extern int nfs_migrate_page(struct address_space *,  		struct page *, struct page *, enum migrate_mode); @@ -506,6 +540,13 @@ extern int nfs_migrate_page(struct address_space *,  #define nfs_migrate_page NULL  #endif +static inline int +nfs_write_verifier_cmp(const struct nfs_write_verifier *v1, +		const struct nfs_write_verifier *v2) +{ +	return memcmp(v1->data, v2->data, sizeof(v1->data)); +} +  /* unlink.c */  extern struct rpc_task *  nfs_async_rename(struct inode *old_dir, struct inode *new_dir, @@ -521,8 +562,7 @@ extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);  /* nfs4proc.c */  extern void __nfs4_read_done_cb(struct nfs_pgio_header *);  extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, -			    const struct rpc_timeout *timeparms, -			    const char *ip_addr); +			    const struct nfs_client_initdata *);  extern int nfs40_walk_client_list(struct nfs_client *clp,  				struct nfs_client **result,  				struct rpc_cred *cred); @@ -623,7 +663,7 @@ void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo)  	if (!cinfo->dreq) {  		struct inode *inode = page_file_mapping(page)->host; -		inc_zone_page_state(page, NR_UNSTABLE_NFS); +		inc_node_page_state(page, NR_UNSTABLE_NFS);  		inc_wb_stat(&inode_to_bdi(inode)->wb, WB_RECLAIMABLE);  		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);  	} diff --git a/fs/nfs/io.c b/fs/nfs/io.c new file mode 100644 index 000000000000..1fc5d1ce327e --- /dev/null +++ b/fs/nfs/io.c @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2016 Trond Myklebust + * + * I/O and data path helper functionality. + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/bitops.h> +#include <linux/rwsem.h> +#include <linux/fs.h> +#include <linux/nfs_fs.h> + +#include "internal.h" + +/* Call with exclusively locked inode->i_rwsem */ +static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode) +{ +	if (test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { +		clear_bit(NFS_INO_ODIRECT, &nfsi->flags); +		inode_dio_wait(inode); +	} +} + +/** + * nfs_start_io_read - declare the file is being used for buffered reads + * @inode - file inode + * + * Declare that a buffered read operation is about to start, and ensure + * that we block all direct I/O. + * On exit, the function ensures that the NFS_INO_ODIRECT flag is unset, + * and holds a shared lock on inode->i_rwsem to ensure that the flag + * cannot be changed. + * In practice, this means that buffered read operations are allowed to + * execute in parallel, thanks to the shared lock, whereas direct I/O + * operations need to wait to grab an exclusive lock in order to set + * NFS_INO_ODIRECT. + * Note that buffered writes and truncates both take a write lock on + * inode->i_rwsem, meaning that those are serialised w.r.t. the reads. + */ +void +nfs_start_io_read(struct inode *inode) +{ +	struct nfs_inode *nfsi = NFS_I(inode); +	/* Be an optimist! */ +	down_read(&inode->i_rwsem); +	if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0) +		return; +	up_read(&inode->i_rwsem); +	/* Slow path.... */ +	down_write(&inode->i_rwsem); +	nfs_block_o_direct(nfsi, inode); +	downgrade_write(&inode->i_rwsem); +} + +/** + * nfs_end_io_read - declare that the buffered read operation is done + * @inode - file inode + * + * Declare that a buffered read operation is done, and release the shared + * lock on inode->i_rwsem. + */ +void +nfs_end_io_read(struct inode *inode) +{ +	up_read(&inode->i_rwsem); +} + +/** + * nfs_start_io_write - declare the file is being used for buffered writes + * @inode - file inode + * + * Declare that a buffered read operation is about to start, and ensure + * that we block all direct I/O. + */ +void +nfs_start_io_write(struct inode *inode) +{ +	down_write(&inode->i_rwsem); +	nfs_block_o_direct(NFS_I(inode), inode); +} + +/** + * nfs_end_io_write - declare that the buffered write operation is done + * @inode - file inode + * + * Declare that a buffered write operation is done, and release the + * lock on inode->i_rwsem. + */ +void +nfs_end_io_write(struct inode *inode) +{ +	up_write(&inode->i_rwsem); +} + +/* Call with exclusively locked inode->i_rwsem */ +static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode) +{ +	if (!test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { +		set_bit(NFS_INO_ODIRECT, &nfsi->flags); +		nfs_wb_all(inode); +	} +} + +/** + * nfs_end_io_direct - declare the file is being used for direct i/o + * @inode - file inode + * + * Declare that a direct I/O operation is about to start, and ensure + * that we block all buffered I/O. + * On exit, the function ensures that the NFS_INO_ODIRECT flag is set, + * and holds a shared lock on inode->i_rwsem to ensure that the flag + * cannot be changed. + * In practice, this means that direct I/O operations are allowed to + * execute in parallel, thanks to the shared lock, whereas buffered I/O + * operations need to wait to grab an exclusive lock in order to clear + * NFS_INO_ODIRECT. + * Note that buffered writes and truncates both take a write lock on + * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. + */ +void +nfs_start_io_direct(struct inode *inode) +{ +	struct nfs_inode *nfsi = NFS_I(inode); +	/* Be an optimist! */ +	down_read(&inode->i_rwsem); +	if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) != 0) +		return; +	up_read(&inode->i_rwsem); +	/* Slow path.... */ +	down_write(&inode->i_rwsem); +	nfs_block_buffered(nfsi, inode); +	downgrade_write(&inode->i_rwsem); +} + +/** + * nfs_end_io_direct - declare that the direct i/o operation is done + * @inode - file inode + * + * Declare that a direct I/O operation is done, and release the shared + * lock on inode->i_rwsem. + */ +void +nfs_end_io_direct(struct inode *inode) +{ +	up_read(&inode->i_rwsem); +} diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 9e9fa347a948..ee753547fb0a 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -76,19 +76,23 @@ struct nfs_server *nfs3_clone_server(struct nfs_server *source,   * low timeout interval so that if a connection is lost, we retry through   * the MDS.   */ -struct nfs_client *nfs3_set_ds_client(struct nfs_client *mds_clp, +struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,  		const struct sockaddr *ds_addr, int ds_addrlen,  		int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans,  		rpc_authflavor_t au_flavor)  { +	struct rpc_timeout ds_timeout; +	struct nfs_client *mds_clp = mds_srv->nfs_client;  	struct nfs_client_initdata cl_init = {  		.addr = ds_addr,  		.addrlen = ds_addrlen, +		.nodename = mds_clp->cl_rpcclient->cl_nodename, +		.ip_addr = mds_clp->cl_ipaddr,  		.nfs_mod = &nfs_v3,  		.proto = ds_proto,  		.net = mds_clp->cl_net, +		.timeparms = &ds_timeout,  	}; -	struct rpc_timeout ds_timeout;  	struct nfs_client *clp;  	char buf[INET6_ADDRSTRLEN + 1]; @@ -97,10 +101,12 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_client *mds_clp,  		return ERR_PTR(-EINVAL);  	cl_init.hostname = buf; +	if (mds_srv->flags & NFS_MOUNT_NORESVPORT) +		set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); +  	/* Use the MDS nfs_client cl_ipaddr. */  	nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); -	clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, -			     au_flavor); +	clp = nfs_get_client(&cl_init, au_flavor);  	return clp;  } diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index aa03ed09ba06..33da841a21bb 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -113,15 +113,17 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)  	if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE))  		return -EOPNOTSUPP; -	nfs_wb_all(inode);  	inode_lock(inode); +	err = nfs_sync_inode(inode); +	if (err) +		goto out_unlock;  	err = nfs42_proc_fallocate(&msg, filep, offset, len);  	if (err == 0)  		truncate_pagecache_range(inode, offset, (offset + len) -1);  	if (err == -EOPNOTSUPP)  		NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE; - +out_unlock:  	inode_unlock(inode);  	return err;  } @@ -154,11 +156,20 @@ static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,  	if (status)  		return status; +	status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping, +			pos_src, pos_src + (loff_t)count - 1); +	if (status) +		return status; +  	status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,  				     dst_lock, FMODE_WRITE);  	if (status)  		return status; +	status = nfs_sync_inode(dst_inode); +	if (status) +		return status; +  	status = nfs4_call_sync(server->client, server, &msg,  				&args.seq_args, &res.seq_res, 0);  	if (status == -ENOTSUPP) @@ -258,7 +269,11 @@ static loff_t _nfs42_proc_llseek(struct file *filep,  	if (status)  		return status; -	nfs_wb_all(inode); +	status = nfs_filemap_write_and_wait_range(inode->i_mapping, +			offset, LLONG_MAX); +	if (status) +		return status; +  	status = nfs4_call_sync(server->client, server, &msg,  				&args.seq_args, &res.seq_res, 0);  	if (status == -ENOTSUPP) @@ -336,8 +351,7 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)  			 * Mark the bad layout state as invalid, then retry  			 * with the current stateid.  			 */ -			set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); -			pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0); +			pnfs_mark_layout_stateid_invalid(lo, &head);  			spin_unlock(&inode->i_lock);  			pnfs_free_lseg_list(&head);  		} else diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 6dc6f2aea0d6..8b2605882a20 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -330,13 +330,21 @@ static int decode_write_response(struct xdr_stream *xdr,  				 struct nfs42_write_res *res)  {  	__be32 *p; -	int stateids;  	p = xdr_inline_decode(xdr, 4 + 8 + 4);  	if (unlikely(!p))  		goto out_overflow; -	stateids = be32_to_cpup(p++); +	/* +	 * We never use asynchronous mode, so warn if a server returns +	 * a stateid. +	 */ +	if (unlikely(*p != 0)) { +		pr_err_once("%s: server has set unrequested " +				"asynchronous mode\n", __func__); +		return -EREMOTEIO; +	} +	p++;  	p = xdr_decode_hyper(p, &res->count);  	res->verifier.committed = be32_to_cpup(p);  	return decode_verifier(xdr, &res->verifier.verifier); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 768456fa1b17..4be567a54958 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -185,6 +185,7 @@ struct nfs4_state {  struct nfs4_exception {  	struct nfs4_state *state;  	struct inode *inode; +	nfs4_stateid *stateid;  	long timeout;  	unsigned char delay : 1,  		      recovering : 1, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 10410e8b5853..8d7d08d4f95f 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -349,10 +349,10 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)   * Returns pointer to an NFS client, or an ERR_PTR value.   */  struct nfs_client *nfs4_init_client(struct nfs_client *clp, -				    const struct rpc_timeout *timeparms, -				    const char *ip_addr) +				    const struct nfs_client_initdata *cl_init)  {  	char buf[INET6_ADDRSTRLEN + 1]; +	const char *ip_addr = cl_init->ip_addr;  	struct nfs_client *old;  	int error; @@ -370,9 +370,9 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,  	__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);  	__set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags); -	error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_GSS_KRB5I); +	error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_GSS_KRB5I);  	if (error == -EINVAL) -		error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX); +		error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX);  	if (error < 0)  		goto error; @@ -793,10 +793,12 @@ static int nfs4_set_client(struct nfs_server *server,  		.hostname = hostname,  		.addr = addr,  		.addrlen = addrlen, +		.ip_addr = ip_addr,  		.nfs_mod = &nfs_v4,  		.proto = proto,  		.minorversion = minorversion,  		.net = net, +		.timeparms = timeparms,  	};  	struct nfs_client *clp;  	int error; @@ -809,7 +811,7 @@ static int nfs4_set_client(struct nfs_server *server,  		set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);  	/* Allocate or find a client reference we can use */ -	clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour); +	clp = nfs_get_client(&cl_init, authflavour);  	if (IS_ERR(clp)) {  		error = PTR_ERR(clp);  		goto error; @@ -842,20 +844,24 @@ error:   * low timeout interval so that if a connection is lost, we retry through   * the MDS.   */ -struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, +struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,  		const struct sockaddr *ds_addr, int ds_addrlen,  		int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans,  		u32 minor_version, rpc_authflavor_t au_flavor)  { +	struct rpc_timeout ds_timeout; +	struct nfs_client *mds_clp = mds_srv->nfs_client;  	struct nfs_client_initdata cl_init = {  		.addr = ds_addr,  		.addrlen = ds_addrlen, +		.nodename = mds_clp->cl_rpcclient->cl_nodename, +		.ip_addr = mds_clp->cl_ipaddr,  		.nfs_mod = &nfs_v4,  		.proto = ds_proto,  		.minorversion = minor_version,  		.net = mds_clp->cl_net, +		.timeparms = &ds_timeout,  	}; -	struct rpc_timeout ds_timeout;  	struct nfs_client *clp;  	char buf[INET6_ADDRSTRLEN + 1]; @@ -863,14 +869,16 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,  		return ERR_PTR(-EINVAL);  	cl_init.hostname = buf; +	if (mds_srv->flags & NFS_MOUNT_NORESVPORT) +		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); +  	/*  	 * Set an authflavor equual to the MDS value. Use the MDS nfs_client  	 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS  	 * (section 13.1 RFC 5661).  	 */  	nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); -	clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, -			     au_flavor); +	clp = nfs_get_client(&cl_init, au_flavor);  	dprintk("<-- %s %p\n", __func__, clp);  	return clp; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 014b0e41ace5..d085ad794884 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -66,7 +66,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)  	if (openflags & O_TRUNC) {  		attr.ia_valid |= ATTR_SIZE;  		attr.ia_size = 0; -		nfs_sync_inode(inode); +		filemap_write_and_wait(inode->i_mapping);  	}  	inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, NULL); @@ -133,21 +133,9 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,  				    struct file *file_out, loff_t pos_out,  				    size_t count, unsigned int flags)  { -	struct inode *in_inode = file_inode(file_in); -	struct inode *out_inode = file_inode(file_out); -	int ret; - -	if (in_inode == out_inode) +	if (file_inode(file_in) == file_inode(file_out))  		return -EINVAL; -	/* flush any pending writes */ -	ret = nfs_sync_inode(in_inode); -	if (ret) -		return ret; -	ret = nfs_sync_inode(out_inode); -	if (ret) -		return ret; -  	return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);  } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index de97567795a5..da5c9e58e907 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -363,6 +363,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server,  {  	struct nfs_client *clp = server->nfs_client;  	struct nfs4_state *state = exception->state; +	const nfs4_stateid *stateid = exception->stateid;  	struct inode *inode = exception->inode;  	int ret = errorcode; @@ -376,9 +377,18 @@ static int nfs4_do_handle_exception(struct nfs_server *server,  		case -NFS4ERR_DELEG_REVOKED:  		case -NFS4ERR_ADMIN_REVOKED:  		case -NFS4ERR_BAD_STATEID: -			if (inode && nfs_async_inode_return_delegation(inode, -						NULL) == 0) -				goto wait_on_recovery; +			if (inode) { +				int err; + +				err = nfs_async_inode_return_delegation(inode, +						stateid); +				if (err == 0) +					goto wait_on_recovery; +				if (stateid != NULL && stateid->type == NFS4_DELEGATION_STATEID_TYPE) { +					exception->retry = 1; +					break; +				} +			}  			if (state == NULL)  				break;  			ret = nfs4_schedule_stateid_recovery(server, state); @@ -427,6 +437,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server,  		case -NFS4ERR_DELAY:  			nfs_inc_server_stats(server, NFSIOS_DELAY);  		case -NFS4ERR_GRACE: +		case -NFS4ERR_LAYOUTTRYLATER:  		case -NFS4ERR_RECALLCONFLICT:  			exception->delay = 1;  			return 0; @@ -2669,28 +2680,17 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,  	return res;  } -static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, -			    struct nfs_fattr *fattr, struct iattr *sattr, -			    struct nfs4_state *state, struct nfs4_label *ilabel, -			    struct nfs4_label *olabel) +static int _nfs4_do_setattr(struct inode *inode, +			    struct nfs_setattrargs *arg, +			    struct nfs_setattrres *res, +			    struct rpc_cred *cred, +			    struct nfs4_state *state)  {  	struct nfs_server *server = NFS_SERVER(inode); -        struct nfs_setattrargs  arg = { -                .fh             = NFS_FH(inode), -                .iap            = sattr, -		.server		= server, -		.bitmask = server->attr_bitmask, -		.label		= ilabel, -        }; -        struct nfs_setattrres  res = { -		.fattr		= fattr, -		.label		= olabel, -		.server		= server, -        };          struct rpc_message msg = {  		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_SETATTR], -		.rpc_argp	= &arg, -		.rpc_resp	= &res, +		.rpc_argp	= arg, +		.rpc_resp	= res,  		.rpc_cred	= cred,          };  	struct rpc_cred *delegation_cred = NULL; @@ -2699,17 +2699,13 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,  	bool truncate;  	int status; -	arg.bitmask = nfs4_bitmask(server, ilabel); -	if (ilabel) -		arg.bitmask = nfs4_bitmask(server, olabel); - -	nfs_fattr_init(fattr); +	nfs_fattr_init(res->fattr);  	/* Servers should only apply open mode checks for file size changes */ -	truncate = (sattr->ia_valid & ATTR_SIZE) ? true : false; +	truncate = (arg->iap->ia_valid & ATTR_SIZE) ? true : false;  	fmode = truncate ? FMODE_WRITE : FMODE_READ; -	if (nfs4_copy_delegation_stateid(inode, fmode, &arg.stateid, &delegation_cred)) { +	if (nfs4_copy_delegation_stateid(inode, fmode, &arg->stateid, &delegation_cred)) {  		/* Use that stateid */  	} else if (truncate && state != NULL) {  		struct nfs_lockowner lockowner = { @@ -2719,19 +2715,19 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,  		if (!nfs4_valid_open_stateid(state))  			return -EBADF;  		if (nfs4_select_rw_stateid(state, FMODE_WRITE, &lockowner, -				&arg.stateid, &delegation_cred) == -EIO) +				&arg->stateid, &delegation_cred) == -EIO)  			return -EBADF;  	} else -		nfs4_stateid_copy(&arg.stateid, &zero_stateid); +		nfs4_stateid_copy(&arg->stateid, &zero_stateid);  	if (delegation_cred)  		msg.rpc_cred = delegation_cred; -	status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); +	status = nfs4_call_sync(server->client, server, &msg, &arg->seq_args, &res->seq_res, 1);  	put_rpccred(delegation_cred);  	if (status == 0 && state != NULL)  		renew_lease(server, timestamp); -	trace_nfs4_setattr(inode, &arg.stateid, status); +	trace_nfs4_setattr(inode, &arg->stateid, status);  	return status;  } @@ -2741,13 +2737,31 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,  			   struct nfs4_label *olabel)  {  	struct nfs_server *server = NFS_SERVER(inode); +        struct nfs_setattrargs  arg = { +                .fh             = NFS_FH(inode), +                .iap            = sattr, +		.server		= server, +		.bitmask = server->attr_bitmask, +		.label		= ilabel, +        }; +        struct nfs_setattrres  res = { +		.fattr		= fattr, +		.label		= olabel, +		.server		= server, +        };  	struct nfs4_exception exception = {  		.state = state,  		.inode = inode, +		.stateid = &arg.stateid,  	};  	int err; + +	arg.bitmask = nfs4_bitmask(server, ilabel); +	if (ilabel) +		arg.bitmask = nfs4_bitmask(server, olabel); +  	do { -		err = _nfs4_do_setattr(inode, cred, fattr, sattr, state, ilabel, olabel); +		err = _nfs4_do_setattr(inode, &arg, &res, cred, state);  		switch (err) {  		case -NFS4ERR_OPENMODE:  			if (!(sattr->ia_valid & ATTR_SIZE)) { @@ -2882,12 +2896,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)  			call_close |= is_wronly;  		else if (is_wronly)  			calldata->arg.fmode |= FMODE_WRITE; +		if (calldata->arg.fmode != (FMODE_READ|FMODE_WRITE)) +			call_close |= is_rdwr;  	} else if (is_rdwr)  		calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; -	if (calldata->arg.fmode == 0) -		call_close |= is_rdwr; -  	if (!nfs4_valid_open_stateid(state))  		call_close = 0;  	spin_unlock(&state->owner->so_lock); @@ -3268,13 +3281,6 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,  	return status;  } -static int nfs4_do_find_root_sec(struct nfs_server *server, -		struct nfs_fh *fhandle, struct nfs_fsinfo *info) -{ -	int mv = server->nfs_client->cl_minorversion; -	return nfs_v4_minor_ops[mv]->find_root_sec(server, fhandle, info); -} -  /**   * nfs4_proc_get_rootfh - get file handle for server's pseudoroot   * @server: initialized nfs_server handle @@ -3294,7 +3300,8 @@ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,  		status = nfs4_lookup_root(server, fhandle, info);  	if (auth_probe || status == NFS4ERR_WRONGSEC) -		status = nfs4_do_find_root_sec(server, fhandle, info); +		status = server->nfs_client->cl_mvops->find_root_sec(server, +				fhandle, info);  	if (status == 0)  		status = nfs4_server_capabilities(server, fhandle); @@ -4393,7 +4400,8 @@ static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,  				 struct rpc_message *msg)  {  	hdr->timestamp   = jiffies; -	hdr->pgio_done_cb = nfs4_read_done_cb; +	if (!hdr->pgio_done_cb) +		hdr->pgio_done_cb = nfs4_read_done_cb;  	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];  	nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0);  } @@ -7870,11 +7878,13 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,  	struct inode *inode = lgp->args.inode;  	struct nfs_server *server = NFS_SERVER(inode);  	struct pnfs_layout_hdr *lo; -	int status = task->tk_status; +	int nfs4err = task->tk_status; +	int err, status = 0; +	LIST_HEAD(head);  	dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status); -	switch (status) { +	switch (nfs4err) {  	case 0:  		goto out; @@ -7906,45 +7916,42 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,  			status = -EOVERFLOW;  			goto out;  		} -		/* Fallthrough */ +		status = -EBUSY; +		break;  	case -NFS4ERR_RECALLCONFLICT: -		nfs4_handle_exception(server, -NFS4ERR_RECALLCONFLICT, -					exception);  		status = -ERECALLCONFLICT; -		goto out; +		break;  	case -NFS4ERR_EXPIRED:  	case -NFS4ERR_BAD_STATEID:  		exception->timeout = 0;  		spin_lock(&inode->i_lock); -		if (nfs4_stateid_match(&lgp->args.stateid, +		lo = NFS_I(inode)->layout; +		/* If the open stateid was bad, then recover it. */ +		if (!lo || test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) || +		    nfs4_stateid_match_other(&lgp->args.stateid,  					&lgp->args.ctx->state->stateid)) {  			spin_unlock(&inode->i_lock); -			/* If the open stateid was bad, then recover it. */  			exception->state = lgp->args.ctx->state;  			break;  		} -		lo = NFS_I(inode)->layout; -		if (lo && nfs4_stateid_match(&lgp->args.stateid, -					&lo->plh_stateid)) { -			LIST_HEAD(head); - -			/* -			 * Mark the bad layout state as invalid, then retry -			 * with the current stateid. -			 */ -			set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); -			pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0); -			spin_unlock(&inode->i_lock); -			pnfs_free_lseg_list(&head); -		} else -			spin_unlock(&inode->i_lock); + +		/* +		 * Mark the bad layout state as invalid, then retry +		 */ +		pnfs_mark_layout_stateid_invalid(lo, &head); +		spin_unlock(&inode->i_lock); +		pnfs_free_lseg_list(&head);  		status = -EAGAIN;  		goto out;  	} -	status = nfs4_handle_exception(server, status, exception); -	if (exception->retry) -		status = -EAGAIN; +	err = nfs4_handle_exception(server, nfs4err, exception); +	if (!status) { +		if (exception->retry) +			status = -EAGAIN; +		else +			status = err; +	}  out:  	dprintk("<-- %s\n", __func__);  	return status; @@ -8036,7 +8043,10 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)  		.flags = RPC_TASK_ASYNC,  	};  	struct pnfs_layout_segment *lseg = NULL; -	struct nfs4_exception exception = { .timeout = *timeout }; +	struct nfs4_exception exception = { +		.inode = inode, +		.timeout = *timeout, +	};  	int status = 0;  	dprintk("--> %s\n", __func__); @@ -8127,8 +8137,7 @@ static void nfs4_layoutreturn_release(void *calldata)  	spin_lock(&lo->plh_inode->i_lock);  	pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range,  			be32_to_cpu(lrp->args.stateid.seqid)); -	pnfs_mark_layout_returned_if_empty(lo); -	if (lrp->res.lrs_present) +	if (lrp->res.lrs_present && pnfs_layout_is_valid(lo))  		pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);  	pnfs_clear_layoutreturn_waitbit(lo);  	spin_unlock(&lo->plh_inode->i_lock); @@ -8833,7 +8842,7 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {  #endif  }; -ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) +static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)  {  	ssize_t error, error2; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9679f4749364..834b875900d6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1488,9 +1488,9 @@ restart:  					}  					spin_unlock(&state->state_lock);  				} -				nfs4_put_open_state(state);  				clear_bit(NFS_STATE_RECLAIM_NOGRACE,  					&state->flags); +				nfs4_put_open_state(state);  				spin_lock(&sp->so_lock);  				goto restart;  			} diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 9c150b153782..cfb8f7ce5cf6 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1235,8 +1235,8 @@ DECLARE_EVENT_CLASS(nfs4_idmap_event,  				len = 0;  			__entry->error = error < 0 ? error : 0;  			__entry->id = id; -			memcpy(__get_dynamic_array(name), name, len); -			((char *)__get_dynamic_array(name))[len] = 0; +			memcpy(__get_str(name), name, len); +			__get_str(name)[len] = 0;  		),  		TP_printk( diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 661e753fe1c9..7bd3a5c09d31 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1985,9 +1985,14 @@ encode_layoutcommit(struct xdr_stream *xdr,  	p = xdr_encode_hyper(p, args->lastbytewritten + 1);	/* length */  	*p = cpu_to_be32(0); /* reclaim */  	encode_nfs4_stateid(xdr, &args->stateid); -	p = reserve_space(xdr, 20); -	*p++ = cpu_to_be32(1); /* newoffset = TRUE */ -	p = xdr_encode_hyper(p, args->lastbytewritten); +	if (args->lastbytewritten != U64_MAX) { +		p = reserve_space(xdr, 20); +		*p++ = cpu_to_be32(1); /* newoffset = TRUE */ +		p = xdr_encode_hyper(p, args->lastbytewritten); +	} else { +		p = reserve_space(xdr, 12); +		*p++ = cpu_to_be32(0); /* newoffset = FALSE */ +	}  	*p++ = cpu_to_be32(0); /* Never send time_modify_changed */  	*p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 0b9e5cc9a747..2ca9167bc97d 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -37,7 +37,6 @@  			{ 1 << NFS_INO_ADVISE_RDPLUS, "ADVISE_RDPLUS" }, \  			{ 1 << NFS_INO_STALE, "STALE" }, \  			{ 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \ -			{ 1 << NFS_INO_FLUSHING, "FLUSHING" }, \  			{ 1 << NFS_INO_FSCACHE, "FSCACHE" }, \  			{ 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \  			{ 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" }) @@ -707,9 +706,9 @@ TRACE_EVENT(nfs_sillyrename_unlink,  			__entry->dev = dir->i_sb->s_dev;  			__entry->dir = NFS_FILEID(dir);  			__entry->error = error; -			memcpy(__get_dynamic_array(name), +			memcpy(__get_str(name),  				data->args.name.name, len); -			((char *)__get_dynamic_array(name))[len] = 0; +			__get_str(name)[len] = 0;  		),  		TP_printk( diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0c7e0d45a4de..70806cae0d36 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -259,7 +259,7 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)   * is required.   * Note that caller must hold inode->i_lock.   */ -static int +int  pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,  		struct list_head *lseg_list)  { @@ -334,14 +334,17 @@ pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode)  }  static void -init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) +pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, +		const struct pnfs_layout_range *range, +		const nfs4_stateid *stateid)  {  	INIT_LIST_HEAD(&lseg->pls_list);  	INIT_LIST_HEAD(&lseg->pls_lc_list);  	atomic_set(&lseg->pls_refcount, 1); -	smp_mb();  	set_bit(NFS_LSEG_VALID, &lseg->pls_flags);  	lseg->pls_layout = lo; +	lseg->pls_range = *range; +	lseg->pls_seq = be32_to_cpu(stateid->seqid);  }  static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) @@ -361,8 +364,10 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,  	list_del_init(&lseg->pls_list);  	/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */  	atomic_dec(&lo->plh_refcount); -	if (list_empty(&lo->plh_segs)) +	if (list_empty(&lo->plh_segs)) { +		set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);  		clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); +	}  	rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);  } @@ -484,15 +489,6 @@ pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1,  	       (end2 == NFS4_MAX_UINT64 || end2 > start1);  } -static bool -should_free_lseg(const struct pnfs_layout_range *lseg_range, -		 const struct pnfs_layout_range *recall_range) -{ -	return (recall_range->iomode == IOMODE_ANY || -		lseg_range->iomode == recall_range->iomode) && -	       pnfs_lseg_range_intersecting(lseg_range, recall_range); -} -  static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,  		struct list_head *tmp_list)  { @@ -531,6 +527,27 @@ static bool pnfs_seqid_is_newer(u32 s1, u32 s2)  	return (s32)(s1 - s2) > 0;  } +static bool +pnfs_should_free_range(const struct pnfs_layout_range *lseg_range, +		 const struct pnfs_layout_range *recall_range) +{ +	return (recall_range->iomode == IOMODE_ANY || +		lseg_range->iomode == recall_range->iomode) && +	       pnfs_lseg_range_intersecting(lseg_range, recall_range); +} + +static bool +pnfs_match_lseg_recall(const struct pnfs_layout_segment *lseg, +		const struct pnfs_layout_range *recall_range, +		u32 seq) +{ +	if (seq != 0 && pnfs_seqid_is_newer(lseg->pls_seq, seq)) +		return false; +	if (recall_range == NULL) +		return true; +	return pnfs_should_free_range(&lseg->pls_range, recall_range); +} +  /**   * pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later   * @lo: layout header containing the lsegs @@ -560,10 +577,7 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,  	if (list_empty(&lo->plh_segs))  		return 0;  	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) -		if (!recall_range || -		    should_free_lseg(&lseg->pls_range, recall_range)) { -			if (seq && pnfs_seqid_is_newer(lseg->pls_seq, seq)) -				continue; +		if (pnfs_match_lseg_recall(lseg, recall_range, seq)) {  			dprintk("%s: freeing lseg %p iomode %d seq %u"  				"offset %llu length %llu\n", __func__,  				lseg, lseg->pls_range.iomode, lseg->pls_seq, @@ -759,24 +773,25 @@ void  pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,  			bool update_barrier)  { -	u32 oldseq, newseq, new_barrier; -	int empty = list_empty(&lo->plh_segs); +	u32 oldseq, newseq, new_barrier = 0; +	bool invalid = !pnfs_layout_is_valid(lo);  	oldseq = be32_to_cpu(lo->plh_stateid.seqid);  	newseq = be32_to_cpu(new->seqid); -	if (empty || pnfs_seqid_is_newer(newseq, oldseq)) { +	if (invalid || pnfs_seqid_is_newer(newseq, oldseq)) {  		nfs4_stateid_copy(&lo->plh_stateid, new); -		if (update_barrier) { -			new_barrier = be32_to_cpu(new->seqid); -		} else { -			/* Because of wraparound, we want to keep the barrier -			 * "close" to the current seqids. -			 */ -			new_barrier = newseq - atomic_read(&lo->plh_outstanding); -		} -		if (empty || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) -			lo->plh_barrier = new_barrier; +		/* +		 * Because of wraparound, we want to keep the barrier +		 * "close" to the current seqids. +		 */ +		new_barrier = newseq - atomic_read(&lo->plh_outstanding);  	} +	if (update_barrier) +		new_barrier = be32_to_cpu(new->seqid); +	else if (new_barrier == 0) +		return; +	if (invalid || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) +		lo->plh_barrier = new_barrier;  }  static bool @@ -871,15 +886,37 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)  	rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);  } +static void +pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) +{ +	lo->plh_return_iomode = 0; +	lo->plh_return_seq = 0; +	clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); +} +  static bool -pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo) +pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, +		nfs4_stateid *stateid, +		enum pnfs_iomode *iomode)  {  	if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))  		return false; -	lo->plh_return_iomode = 0; -	lo->plh_return_seq = 0;  	pnfs_get_layout_hdr(lo); -	clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); +	if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { +		if (stateid != NULL) { +			nfs4_stateid_copy(stateid, &lo->plh_stateid); +			if (lo->plh_return_seq != 0) +				stateid->seqid = cpu_to_be32(lo->plh_return_seq); +		} +		if (iomode != NULL) +			*iomode = lo->plh_return_iomode; +		pnfs_clear_layoutreturn_info(lo); +		return true; +	} +	if (stateid != NULL) +		nfs4_stateid_copy(stateid, &lo->plh_stateid); +	if (iomode != NULL) +		*iomode = IOMODE_ANY;  	return true;  } @@ -947,10 +984,7 @@ static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)  		enum pnfs_iomode iomode;  		bool send; -		nfs4_stateid_copy(&stateid, &lo->plh_stateid); -		stateid.seqid = cpu_to_be32(lo->plh_return_seq); -		iomode = lo->plh_return_iomode; -		send = pnfs_prepare_layoutreturn(lo); +		send = pnfs_prepare_layoutreturn(lo, &stateid, &iomode);  		spin_unlock(&inode->i_lock);  		if (send) {  			/* Send an async layoutreturn so we dont deadlock */ @@ -987,7 +1021,6 @@ _pnfs_return_layout(struct inode *ino)  		dprintk("NFS: %s no layout to return\n", __func__);  		goto out;  	} -	nfs4_stateid_copy(&stateid, &nfsi->layout->plh_stateid);  	/* Reference matched in nfs4_layoutreturn_release */  	pnfs_get_layout_hdr(lo);  	empty = list_empty(&lo->plh_segs); @@ -1010,8 +1043,7 @@ _pnfs_return_layout(struct inode *ino)  		goto out_put_layout_hdr;  	} -	set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); -	send = pnfs_prepare_layoutreturn(lo); +	send = pnfs_prepare_layoutreturn(lo, &stateid, NULL);  	spin_unlock(&ino->i_lock);  	pnfs_free_lseg_list(&tmp_list);  	if (send) @@ -1078,11 +1110,10 @@ bool pnfs_roc(struct inode *ino)  			goto out_noroc;  	} -	nfs4_stateid_copy(&stateid, &lo->plh_stateid);  	/* always send layoutreturn if being marked so */ -	if (test_and_clear_bit(NFS_LAYOUT_RETURN_REQUESTED, -				   &lo->plh_flags)) -		layoutreturn = pnfs_prepare_layoutreturn(lo); +	if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) +		layoutreturn = pnfs_prepare_layoutreturn(lo, +				&stateid, NULL);  	list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)  		/* If we are sending layoutreturn, invalidate all valid lsegs */ @@ -1130,7 +1161,6 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)  	spin_lock(&ino->i_lock);  	lo = NFS_I(ino)->layout; -	pnfs_mark_layout_returned_if_empty(lo);  	if (pnfs_seqid_is_newer(barrier, lo->plh_barrier))  		lo->plh_barrier = barrier;  	spin_unlock(&ino->i_lock); @@ -1290,6 +1320,7 @@ alloc_init_layout_hdr(struct inode *ino,  	INIT_LIST_HEAD(&lo->plh_bulk_destroy);  	lo->plh_inode = ino;  	lo->plh_lc_cred = get_rpccred(ctx->cred); +	lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID;  	return lo;  } @@ -1297,6 +1328,8 @@ static struct pnfs_layout_hdr *  pnfs_find_alloc_layout(struct inode *ino,  		       struct nfs_open_context *ctx,  		       gfp_t gfp_flags) +	__releases(&ino->i_lock) +	__acquires(&ino->i_lock)  {  	struct nfs_inode *nfsi = NFS_I(ino);  	struct pnfs_layout_hdr *new = NULL; @@ -1500,7 +1533,7 @@ pnfs_update_layout(struct inode *ino,  	struct pnfs_layout_segment *lseg = NULL;  	nfs4_stateid stateid;  	long timeout = 0; -	unsigned long giveup = jiffies + rpc_get_timeout(server->client); +	unsigned long giveup = jiffies + (clp->cl_lease_time << 1);  	bool first;  	if (!pnfs_enabled_sb(NFS_SERVER(ino))) { @@ -1565,8 +1598,7 @@ lookup_again:  	 * stateid, or it has been invalidated, then we must use the open  	 * stateid.  	 */ -	if (lo->plh_stateid.seqid == 0 || -	    test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { +	if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) {  		/*  		 * The first layoutget for the file. Need to serialize per @@ -1641,33 +1673,44 @@ lookup_again:  	lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags);  	trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,  				 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); +	atomic_dec(&lo->plh_outstanding);  	if (IS_ERR(lseg)) {  		switch(PTR_ERR(lseg)) { -		case -ERECALLCONFLICT: +		case -EBUSY:  			if (time_after(jiffies, giveup))  				lseg = NULL; -			/* Fallthrough */ -		case -EAGAIN: -			pnfs_put_layout_hdr(lo); -			if (first) -				pnfs_clear_first_layoutget(lo); -			if (lseg) { -				trace_pnfs_update_layout(ino, pos, count, -					iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY); -				goto lookup_again; +			break; +		case -ERECALLCONFLICT: +			/* Huh? We hold no layouts, how is there a recall? */ +			if (first) { +				lseg = NULL; +				break;  			} +			/* Destroy the existing layout and start over */ +			if (time_after(jiffies, giveup)) +				pnfs_destroy_layout(NFS_I(ino));  			/* Fallthrough */ +		case -EAGAIN: +			break;  		default:  			if (!nfs_error_is_fatal(PTR_ERR(lseg))) {  				pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));  				lseg = NULL;  			} +			goto out_put_layout_hdr; +		} +		if (lseg) { +			if (first) +				pnfs_clear_first_layoutget(lo); +			trace_pnfs_update_layout(ino, pos, count, +				iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY); +			pnfs_put_layout_hdr(lo); +			goto lookup_again;  		}  	} else {  		pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));  	} -	atomic_dec(&lo->plh_outstanding);  out_put_layout_hdr:  	if (first)  		pnfs_clear_first_layoutget(lo); @@ -1731,9 +1774,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)  		return lseg;  	} -	init_lseg(lo, lseg); -	lseg->pls_range = res->range; -	lseg->pls_seq = be32_to_cpu(res->stateid.seqid); +	pnfs_init_lseg(lo, lseg, &res->range, &res->stateid);  	spin_lock(&ino->i_lock);  	if (pnfs_layoutgets_blocked(lo)) { @@ -1754,16 +1795,19 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)  		 * inode invalid, and don't bother validating the stateid  		 * sequence number.  		 */ -		pnfs_mark_matching_lsegs_invalid(lo, &free_me, NULL, 0); +		pnfs_mark_layout_stateid_invalid(lo, &free_me);  		nfs4_stateid_copy(&lo->plh_stateid, &res->stateid);  		lo->plh_barrier = be32_to_cpu(res->stateid.seqid);  	} -	clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); -  	pnfs_get_lseg(lseg);  	pnfs_layout_insert_lseg(lo, lseg, &free_me); +	if (!pnfs_layout_is_valid(lo)) { +		pnfs_clear_layoutreturn_info(lo); +		clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); +	} +  	if (res->return_on_close)  		set_bit(NFS_LSEG_ROC, &lseg->pls_flags); @@ -1783,14 +1827,14 @@ static void  pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,  			 u32 seq)  { -	if (lo->plh_return_iomode == iomode) -		return; -	if (lo->plh_return_iomode != 0) +	if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode)  		iomode = IOMODE_ANY;  	lo->plh_return_iomode = iomode;  	set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); -	if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq)) +	if (seq != 0) { +		WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq);  		lo->plh_return_seq = seq; +	}  }  /** @@ -1820,7 +1864,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,  	assert_spin_locked(&lo->plh_inode->i_lock);  	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) -		if (should_free_lseg(&lseg->pls_range, return_range)) { +		if (pnfs_match_lseg_recall(lseg, return_range, seq)) {  			dprintk("%s: marking lseg %p iomode %d "  				"offset %llu length %llu\n", __func__,  				lseg, lseg->pls_range.iomode, @@ -1851,19 +1895,17 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,  	bool return_now = false;  	spin_lock(&inode->i_lock); -	pnfs_set_plh_return_info(lo, range.iomode, lseg->pls_seq); +	pnfs_set_plh_return_info(lo, range.iomode, 0);  	/*  	 * mark all matching lsegs so that we are sure to have no live  	 * segments at hand when sending layoutreturn. See pnfs_put_lseg()  	 * for how it works.  	 */ -	if (!pnfs_mark_matching_lsegs_return(lo, &free_me, -						&range, lseg->pls_seq)) { +	if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0)) {  		nfs4_stateid stateid; -		enum pnfs_iomode iomode = lo->plh_return_iomode; +		enum pnfs_iomode iomode; -		nfs4_stateid_copy(&stateid, &lo->plh_stateid); -		return_now = pnfs_prepare_layoutreturn(lo); +		return_now = pnfs_prepare_layoutreturn(lo, &stateid, &iomode);  		spin_unlock(&inode->i_lock);  		if (return_now)  			pnfs_send_layoutreturn(lo, &stateid, iomode, false); @@ -2378,7 +2420,10 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)  	nfs_fattr_init(&data->fattr);  	data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;  	data->res.fattr = &data->fattr; -	data->args.lastbytewritten = end_pos - 1; +	if (end_pos != 0) +		data->args.lastbytewritten = end_pos - 1; +	else +		data->args.lastbytewritten = U64_MAX;  	data->res.server = NFS_SERVER(inode);  	if (ld->prepare_layoutcommit) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index b21bd0bee784..31d99b2927b0 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -268,6 +268,8 @@ int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,  				struct list_head *tmp_list,  				const struct pnfs_layout_range *recall_range,  				u32 seq); +int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, +		struct list_head *lseg_list);  bool pnfs_roc(struct inode *ino);  void pnfs_roc_release(struct inode *ino);  void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); @@ -375,6 +377,11 @@ static inline bool nfs_have_layout(struct inode *inode)  	return NFS_I(inode)->layout != NULL;  } +static inline bool pnfs_layout_is_valid(const struct pnfs_layout_hdr *lo) +{ +	return test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) == 0; +} +  static inline struct nfs4_deviceid_node *  nfs4_get_deviceid(struct nfs4_deviceid_node *d)  { @@ -545,19 +552,6 @@ pnfs_calc_offset_length(u64 offset, u64 end)  	return 1 + end - offset;  } -/** - * pnfs_mark_layout_returned_if_empty - marks the layout as returned - * @lo: layout header - * - * Note: Caller must hold inode->i_lock - */ -static inline void -pnfs_mark_layout_returned_if_empty(struct pnfs_layout_hdr *lo) -{ -	if (list_empty(&lo->plh_segs)) -		set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); -} -  static inline void  pnfs_copy_range(struct pnfs_layout_range *dst,  		const struct pnfs_layout_range *src) @@ -629,6 +623,13 @@ pnfs_sync_inode(struct inode *inode, bool datasync)  }  static inline bool +pnfs_layoutcommit_outstanding(struct inode *inode) +{ +	return false; +} + + +static inline bool  pnfs_roc(struct inode *ino)  {  	return false; @@ -716,13 +717,6 @@ pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,  	return false;  } -static inline bool -pnfs_layoutcommit_outstanding(struct inode *inode) -{ -	return false; -} - -  static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)  {  	return NULL; diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 0dfc476da3e1..f3468b57a32a 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -247,7 +247,11 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages,  }  /* Helper function for pnfs_generic_commit_pagelist to catch an empty - * page list. This can happen when two commits race. */ + * page list. This can happen when two commits race. + * + * This must be called instead of nfs_init_commit - call one or the other, but + * not both! + */  static bool  pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,  					  struct nfs_commit_data *data, @@ -256,7 +260,11 @@ pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,  	if (list_empty(pages)) {  		if (atomic_dec_and_test(&cinfo->mds->rpcs_out))  			wake_up_atomic_t(&cinfo->mds->rpcs_out); -		nfs_commitdata_release(data); +		/* don't call nfs_commitdata_release - it tries to put +		 * the open_context which is not acquired until nfs_init_commit +		 * which has not been called on @data */ +		WARN_ON_ONCE(data->context); +		nfs_commit_free(data);  		return true;  	} @@ -587,7 +595,7 @@ static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)  }  static struct nfs_client *(*get_v3_ds_connect)( -			struct nfs_client *mds_clp, +			struct nfs_server *mds_srv,  			const struct sockaddr *ds_addr,  			int ds_addrlen,  			int ds_proto, @@ -646,7 +654,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,  			rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,  					rpc_clnt_test_and_add_xprt, NULL);  		} else -			clp = get_v3_ds_connect(mds_srv->nfs_client, +			clp = get_v3_ds_connect(mds_srv,  					(struct sockaddr *)&da->da_addr,  					da->da_addrlen, IPPROTO_TCP,  					timeo, retrans, au_flavor); @@ -682,7 +690,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,  		dprintk("%s: DS %s: trying address %s\n",  			__func__, ds->ds_remotestr, da->da_remotestr); -		clp = nfs4_set_ds_client(mds_srv->nfs_client, +		clp = nfs4_set_ds_client(mds_srv,  					(struct sockaddr *)&da->da_addr,  					da->da_addrlen, IPPROTO_TCP,  					timeo, retrans, minor_version, @@ -932,6 +940,13 @@ EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);  int  pnfs_nfs_generic_sync(struct inode *inode, bool datasync)  { +	int ret; + +	if (!pnfs_layoutcommit_outstanding(inode)) +		return 0; +	ret = nfs_commit_inode(inode, FLUSH_SYNC); +	if (ret < 0) +		return ret;  	if (datasync)  		return 0;  	return pnfs_layoutcommit_inode(inode, true); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6776d7a7839e..572e5b3b06f1 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -367,13 +367,13 @@ readpage_async_filler(void *data, struct page *page)  		nfs_list_remove_request(new);  		nfs_readpage_release(new);  		error = desc->pgio->pg_error; -		goto out_unlock; +		goto out;  	}  	return 0;  out_error:  	error = PTR_ERR(new); -out_unlock:  	unlock_page(page); +out:  	return error;  } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2137e0202f25..18d446e1a82b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1684,6 +1684,7 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,  {  	rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR;  	unsigned int i; +	int use_auth_null = false;  	/*  	 * If the sec= mount option is used, the specified flavor or AUTH_NULL @@ -1691,14 +1692,21 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,  	 *  	 * AUTH_NULL has a special meaning when it's in the server list - it  	 * means that the server will ignore the rpc creds, so any flavor -	 * can be used. +	 * can be used but still use the sec= that was specified.  	 */  	for (i = 0; i < count; i++) {  		flavor = server_authlist[i]; -		if (nfs_auth_info_match(&args->auth_info, flavor) || -		    flavor == RPC_AUTH_NULL) +		if (nfs_auth_info_match(&args->auth_info, flavor))  			goto out; + +		if (flavor == RPC_AUTH_NULL) +			use_auth_null = true; +	} + +	if (use_auth_null) { +		flavor = RPC_AUTH_NULL; +		goto out;  	}  	dfprintk(MOUNT, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e1c74d3db64d..3a6724c6eb5f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -625,7 +625,7 @@ static int nfs_writepage_locked(struct page *page,  	int err;  	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); -	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), +	nfs_pageio_init_write(&pgio, inode, 0,  				false, &nfs_async_write_completion_ops);  	err = nfs_do_writepage(page, wbc, &pgio, launder);  	nfs_pageio_complete(&pgio); @@ -657,16 +657,9 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *  int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)  {  	struct inode *inode = mapping->host; -	unsigned long *bitlock = &NFS_I(inode)->flags;  	struct nfs_pageio_descriptor pgio;  	int err; -	/* Stop dirtying of new pages while we sync */ -	err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING, -			nfs_wait_bit_killable, TASK_KILLABLE); -	if (err) -		goto out_err; -  	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);  	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false, @@ -674,10 +667,6 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)  	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);  	nfs_pageio_complete(&pgio); -	clear_bit_unlock(NFS_INO_FLUSHING, bitlock); -	smp_mb__after_atomic(); -	wake_up_bit(bitlock, NFS_INO_FLUSHING); -  	if (err < 0)  		goto out_err;  	err = pgio.pg_error; @@ -898,7 +887,7 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,  static void  nfs_clear_page_commit(struct page *page)  { -	dec_zone_page_state(page, NR_UNSTABLE_NFS); +	dec_node_page_state(page, NR_UNSTABLE_NFS);  	dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb,  		    WB_RECLAIMABLE);  } @@ -1195,9 +1184,11 @@ nfs_key_timeout_notify(struct file *filp, struct inode *inode)  /*   * Test if the open context credential key is marked to expire soon.   */ -bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx) +bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode)  { -	return rpcauth_cred_key_to_expire(ctx->cred); +	struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth; + +	return rpcauth_cred_key_to_expire(auth, ctx->cred);  }  /* @@ -1289,6 +1280,9 @@ int nfs_updatepage(struct file *file, struct page *page,  	dprintk("NFS:       nfs_updatepage(%pD2 %d@%lld)\n",  		file, count, (long long)(page_file_offset(page) + offset)); +	if (!count) +		goto out; +  	if (nfs_can_extend_write(file, page, inode)) {  		count = max(count + offset, nfs_page_length(page));  		offset = 0; @@ -1299,7 +1293,7 @@ int nfs_updatepage(struct file *file, struct page *page,  		nfs_set_pageerror(page);  	else  		__set_page_dirty_nobuffers(page); - +out:  	dprintk("NFS:       nfs_updatepage returns %d (isize %lld)\n",  			status, (long long)i_size_read(inode));  	return status; @@ -1800,7 +1794,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)  		/* Okay, COMMIT succeeded, apparently. Check the verifier  		 * returned by the server against all stored verfs. */ -		if (!memcmp(&req->wb_verf, &data->verf.verifier, sizeof(req->wb_verf))) { +		if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {  			/* We have a match */  			nfs_inode_remove_request(req);  			dprintk(" OK\n"); @@ -1924,6 +1918,24 @@ out_mark_dirty:  EXPORT_SYMBOL_GPL(nfs_write_inode);  /* + * Wrapper for filemap_write_and_wait_range() + * + * Needed for pNFS in order to ensure data becomes visible to the + * client. + */ +int nfs_filemap_write_and_wait_range(struct address_space *mapping, +		loff_t lstart, loff_t lend) +{ +	int ret; + +	ret = filemap_write_and_wait_range(mapping, lstart, lend); +	if (ret == 0) +		ret = pnfs_sync_inode(mapping->host, true); +	return ret; +} +EXPORT_SYMBOL_GPL(nfs_filemap_write_and_wait_range); + +/*   * flush the inode to disk.   */  int nfs_wb_all(struct inode *inode)  |