diff options
35 files changed, 1844 insertions, 1159 deletions
| diff --git a/fs/Kconfig b/fs/Kconfig index 313b2e06ded5..84ab76a206a0 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1544,10 +1544,6 @@ config UFS_FS            The recently released UFS2 variant (used in FreeBSD 5.x) is            READ-ONLY supported. -	  If you only intend to mount files from some other Unix over the -	  network using NFS, you don't need the UFS file system support (but -	  you need NFS file system support obviously). -  	  Note that this option is generally not needed for floppies, since a  	  good portable way to transport files and directories between unixes  	  (and even other operating systems) is given by the tar program ("man @@ -1587,6 +1583,7 @@ menuconfig NETWORK_FILESYSTEMS  	  Say Y here to get to see options for network filesystems and  	  filesystem-related networking code, such as NFS daemon and  	  RPCSEC security modules. +  	  This option alone does not add any kernel code.  	  If you say N, all options in this submenu will be skipped and @@ -1595,76 +1592,92 @@ menuconfig NETWORK_FILESYSTEMS  if NETWORK_FILESYSTEMS  config NFS_FS -	tristate "NFS file system support" +	tristate "NFS client support"  	depends on INET  	select LOCKD  	select SUNRPC  	select NFS_ACL_SUPPORT if NFS_V3_ACL  	help -	  If you are connected to some other (usually local) Unix computer -	  (using SLIP, PLIP, PPP or Ethernet) and want to mount files residing -	  on that computer (the NFS server) using the Network File Sharing -	  protocol, say Y. "Mounting files" means that the client can access -	  the files with usual UNIX commands as if they were sitting on the -	  client's hard disk. For this to work, the server must run the -	  programs nfsd and mountd (but does not need to have NFS file system -	  support enabled in its kernel). NFS is explained in the Network -	  Administrator's Guide, available from -	  <http://www.tldp.org/docs.html#guide>, on its man page: "man -	  nfs", and in the NFS-HOWTO. - -	  A superior but less widely used alternative to NFS is provided by -	  the Coda file system; see "Coda file system support" below. +	  Choose Y here if you want to access files residing on other +	  computers using Sun's Network File System protocol.  To compile +	  this file system support as a module, choose M here: the module +	  will be called nfs. -	  If you say Y here, you should have said Y to TCP/IP networking also. -	  This option would enlarge your kernel by about 27 KB. +	  To mount file systems exported by NFS servers, you also need to +	  install the user space mount.nfs command which can be found in +	  the Linux nfs-utils package, available from http://linux-nfs.org/. +	  Information about using the mount command is available in the +	  mount(8) man page.  More detail about the Linux NFS client +	  implementation is available via the nfs(5) man page. -	  To compile this file system support as a module, choose M here: the -	  module will be called nfs. +	  Below you can choose which versions of the NFS protocol are +	  available in the kernel to mount NFS servers.  Support for NFS +	  version 2 (RFC 1094) is always available when NFS_FS is selected. -	  If you are configuring a diskless machine which will mount its root -	  file system over NFS at boot time, say Y here and to "Kernel -	  level IP autoconfiguration" above and to "Root file system on NFS" -	  below. You cannot compile this driver as a module in this case. -	  There are two packages designed for booting diskless machines over -	  the net: netboot, available from -	  <http://ftp1.sourceforge.net/netboot/>, and Etherboot, -	  available from <http://ftp1.sourceforge.net/etherboot/>. +	  To configure a system which mounts its root file system via NFS +	  at boot time, say Y here, select "Kernel level IP +	  autoconfiguration" in the NETWORK menu, and select "Root file +	  system on NFS" below.  You cannot compile this file system as a +	  module in this case. -	  If you don't know what all this is about, say N. +	  If unsure, say N.  config NFS_V3 -	bool "Provide NFSv3 client support" +	bool "NFS client support for NFS version 3"  	depends on NFS_FS  	help -	  Say Y here if you want your NFS client to be able to speak version -	  3 of the NFS protocol. +	  This option enables support for version 3 of the NFS protocol +	  (RFC 1813) in the kernel's NFS client.  	  If unsure, say Y.  config NFS_V3_ACL -	bool "Provide client support for the NFSv3 ACL protocol extension" +	bool "NFS client support for the NFSv3 ACL protocol extension"  	depends on NFS_V3  	help -	  Implement the NFSv3 ACL protocol extension for manipulating POSIX -	  Access Control Lists.  The server should also be compiled with -	  the NFSv3 ACL protocol extension; see the CONFIG_NFSD_V3_ACL option. +	  Some NFS servers support an auxiliary NFSv3 ACL protocol that +	  Sun added to Solaris but never became an official part of the +	  NFS version 3 protocol.  This protocol extension allows +	  applications on NFS clients to manipulate POSIX Access Control +	  Lists on files residing on NFS servers.  NFS servers enforce +	  ACLs on local files whether this protocol is available or not. + +	  Choose Y here if your NFS server supports the Solaris NFSv3 ACL +	  protocol extension and you want your NFS client to allow +	  applications to access and modify ACLs on files on the server. + +	  Most NFS servers don't support the Solaris NFSv3 ACL protocol +	  extension.  You can choose N here or specify the "noacl" mount +	  option to prevent your NFS client from trying to use the NFSv3 +	  ACL protocol.  	  If unsure, say N.  config NFS_V4 -	bool "Provide NFSv4 client support (EXPERIMENTAL)" +	bool "NFS client support for NFS version 4 (EXPERIMENTAL)"  	depends on NFS_FS && EXPERIMENTAL  	select RPCSEC_GSS_KRB5  	help -	  Say Y here if you want your NFS client to be able to speak the newer -	  version 4 of the NFS protocol. +	  This option enables support for version 4 of the NFS protocol +	  (RFC 3530) in the kernel's NFS client. -	  Note: Requires auxiliary userspace daemons which may be found on -		http://www.citi.umich.edu/projects/nfsv4/ +	  To mount NFS servers using NFSv4, you also need to install user +	  space programs which can be found in the Linux nfs-utils package, +	  available from http://linux-nfs.org/.  	  If unsure, say N. +config ROOT_NFS +	bool "Root file system on NFS" +	depends on NFS_FS=y && IP_PNP +	help +	  If you want your system to mount its root file system via NFS, +	  choose Y here.  This is common practice for managing systems +	  without local permanent storage.  For details, read +	  <file:Documentation/filesystems/nfsroot.txt>. + +	  Most people say N here. +  config NFSD  	tristate "NFS server support"  	depends on INET @@ -1746,20 +1759,6 @@ config NFSD_V4  	  If unsure, say N. -config ROOT_NFS -	bool "Root file system on NFS" -	depends on NFS_FS=y && IP_PNP -	help -	  If you want your Linux box to mount its whole root file system (the -	  one containing the directory /) from some other computer over the -	  net via NFS (presumably because your box doesn't have a hard disk), -	  say Y. Read <file:Documentation/filesystems/nfsroot.txt> for -	  details. It is likely that in this case, you also want to say Y to -	  "Kernel level IP autoconfiguration" so that your box can discover -	  its network address at boot time. - -	  Most people say N here. -  config LOCKD  	tristate @@ -1800,27 +1799,6 @@ config SUNRPC_XPRT_RDMA  	  If unsure, say N. -config SUNRPC_BIND34 -	bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)" -	depends on SUNRPC && EXPERIMENTAL -	default n -	help -	  RPC requests over IPv6 networks require support for larger -	  addresses when performing an RPC bind.  Sun added support for -	  IPv6 addressing by creating two new versions of the rpcbind -	  protocol (RFC 1833). - -	  This option enables support in the kernel RPC client for -	  querying rpcbind servers via versions 3 and 4 of the rpcbind -	  protocol.  The kernel automatically falls back to version 2 -	  if a remote rpcbind service does not support versions 3 or 4. -	  By themselves, these new versions do not provide support for -	  RPC over IPv6, but the new protocol versions are necessary to -	  support it. - -	  If unsure, say N to get traditional behavior (version 2 rpcbind -	  requests only). -  config RPCSEC_GSS_KRB5  	tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"  	depends on SUNRPC && EXPERIMENTAL diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 5df517b81f3f..fd7d4669776e 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -430,7 +430,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)  			 * Report the conflicting lock back to the application.  			 */  			fl->fl_start = req->a_res.lock.fl.fl_start; -			fl->fl_end = req->a_res.lock.fl.fl_start; +			fl->fl_end = req->a_res.lock.fl.fl_end;  			fl->fl_type = req->a_res.lock.fl.fl_type;  			fl->fl_pid = 0;  			break; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index c1e7c8300629..f447f4b4476c 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -27,7 +27,7 @@  struct nfs_callback_data {  	unsigned int users; -	struct svc_serv *serv; +	struct svc_rqst *rqst;  	struct task_struct *task;  }; @@ -91,21 +91,17 @@ nfs_callback_svc(void *vrqstp)  		svc_process(rqstp);  	}  	unlock_kernel(); -	nfs_callback_info.task = NULL; -	svc_exit_thread(rqstp);  	return 0;  }  /* - * Bring up the server process if it is not already up. + * Bring up the callback thread if it is not already up.   */  int nfs_callback_up(void)  {  	struct svc_serv *serv = NULL; -	struct svc_rqst *rqstp;  	int ret = 0; -	lock_kernel();  	mutex_lock(&nfs_callback_mutex);  	if (nfs_callback_info.users++ || nfs_callback_info.task != NULL)  		goto out; @@ -121,22 +117,23 @@ int nfs_callback_up(void)  	nfs_callback_tcpport = ret;  	dprintk("Callback port = 0x%x\n", nfs_callback_tcpport); -	rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); -	if (IS_ERR(rqstp)) { -		ret = PTR_ERR(rqstp); +	nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); +	if (IS_ERR(nfs_callback_info.rqst)) { +		ret = PTR_ERR(nfs_callback_info.rqst); +		nfs_callback_info.rqst = NULL;  		goto out_err;  	}  	svc_sock_update_bufs(serv); -	nfs_callback_info.serv = serv; -	nfs_callback_info.task = kthread_run(nfs_callback_svc, rqstp, +	nfs_callback_info.task = kthread_run(nfs_callback_svc, +					     nfs_callback_info.rqst,  					     "nfsv4-svc");  	if (IS_ERR(nfs_callback_info.task)) {  		ret = PTR_ERR(nfs_callback_info.task); -		nfs_callback_info.serv = NULL; +		svc_exit_thread(nfs_callback_info.rqst); +		nfs_callback_info.rqst = NULL;  		nfs_callback_info.task = NULL; -		svc_exit_thread(rqstp);  		goto out_err;  	}  out: @@ -149,7 +146,6 @@ out:  	if (serv)  		svc_destroy(serv);  	mutex_unlock(&nfs_callback_mutex); -	unlock_kernel();  	return ret;  out_err:  	dprintk("Couldn't create callback socket or server thread; err = %d\n", @@ -159,17 +155,19 @@ out_err:  }  /* - * Kill the server process if it is not already down. + * Kill the callback thread if it's no longer being used.   */  void nfs_callback_down(void)  { -	lock_kernel();  	mutex_lock(&nfs_callback_mutex);  	nfs_callback_info.users--; -	if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL) +	if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL) {  		kthread_stop(nfs_callback_info.task); +		svc_exit_thread(nfs_callback_info.rqst); +		nfs_callback_info.rqst = NULL; +		nfs_callback_info.task = NULL; +	}  	mutex_unlock(&nfs_callback_mutex); -	unlock_kernel();  }  static int nfs_callback_authenticate(struct svc_rqst *rqstp) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f2a092ca69b5..5ee23e7058b3 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -431,14 +431,14 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,  {  	to->to_initval = timeo * HZ / 10;  	to->to_retries = retrans; -	if (!to->to_retries) -		to->to_retries = 2;  	switch (proto) {  	case XPRT_TRANSPORT_TCP:  	case XPRT_TRANSPORT_RDMA: +		if (to->to_retries == 0) +			to->to_retries = NFS_DEF_TCP_RETRANS;  		if (to->to_initval == 0) -			to->to_initval = 60 * HZ; +			to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10;  		if (to->to_initval > NFS_MAX_TCP_TIMEOUT)  			to->to_initval = NFS_MAX_TCP_TIMEOUT;  		to->to_increment = to->to_initval; @@ -450,14 +450,17 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,  		to->to_exponential = 0;  		break;  	case XPRT_TRANSPORT_UDP: -	default: +		if (to->to_retries == 0) +			to->to_retries = NFS_DEF_UDP_RETRANS;  		if (!to->to_initval) -			to->to_initval = 11 * HZ / 10; +			to->to_initval = NFS_DEF_UDP_TIMEO * HZ / 10;  		if (to->to_initval > NFS_MAX_UDP_TIMEOUT)  			to->to_initval = NFS_MAX_UDP_TIMEOUT;  		to->to_maxval = NFS_MAX_UDP_TIMEOUT;  		to->to_exponential = 1;  		break; +	default: +		BUG();  	}  } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 982a2064fe4c..b1940660502f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -133,8 +133,11 @@ nfs_opendir(struct inode *inode, struct file *filp)  {  	int res; -	dfprintk(VFS, "NFS: opendir(%s/%ld)\n", -			inode->i_sb->s_id, inode->i_ino); +	dfprintk(FILE, "NFS: open dir(%s/%s)\n", +			filp->f_path.dentry->d_parent->d_name.name, +			filp->f_path.dentry->d_name.name); + +	nfs_inc_stats(inode, NFSIOS_VFSOPEN);  	lock_kernel();  	/* Call generic open code in order to cache credentials */ @@ -528,7 +531,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)  	struct nfs_fattr fattr;  	long		res; -	dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n", +	dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",  			dentry->d_parent->d_name.name, dentry->d_name.name,  			(long long)filp->f_pos);  	nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); @@ -595,7 +598,7 @@ out:  	unlock_kernel();  	if (res > 0)  		res = 0; -	dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n", +	dfprintk(FILE, "NFS: readdir(%s/%s) returns %ld\n",  			dentry->d_parent->d_name.name, dentry->d_name.name,  			res);  	return res; @@ -603,7 +606,15 @@ out:  static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)  { -	mutex_lock(&filp->f_path.dentry->d_inode->i_mutex); +	struct dentry *dentry = filp->f_path.dentry; +	struct inode *inode = dentry->d_inode; + +	dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", +			dentry->d_parent->d_name.name, +			dentry->d_name.name, +			offset, origin); + +	mutex_lock(&inode->i_mutex);  	switch (origin) {  		case 1:  			offset += filp->f_pos; @@ -619,7 +630,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)  		nfs_file_open_context(filp)->dir_cookie = 0;  	}  out: -	mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex); +	mutex_unlock(&inode->i_mutex);  	return offset;  } @@ -629,10 +640,11 @@ out:   */  static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)  { -	dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n", +	dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n",  			dentry->d_parent->d_name.name, dentry->d_name.name,  			datasync); +	nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC);  	return 0;  } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4757a2b326a1..08f6b040d289 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -890,7 +890,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,  	count = iov_length(iov, nr_segs);  	nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); -	dprintk("nfs: direct read(%s/%s, %zd@%Ld)\n", +	dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n",  		file->f_path.dentry->d_parent->d_name.name,  		file->f_path.dentry->d_name.name,  		count, (long long) pos); @@ -947,7 +947,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,  	count = iov_length(iov, nr_segs);  	nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); -	dfprintk(VFS, "nfs: direct write(%s/%s, %zd@%Ld)\n", +	dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n",  		file->f_path.dentry->d_parent->d_name.name,  		file->f_path.dentry->d_name.name,  		count, (long long) pos); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 4e98a56a1777..43164fe86069 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -50,7 +50,7 @@ static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,  static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,  				unsigned long nr_segs, loff_t pos);  static int  nfs_file_flush(struct file *, fl_owner_t id); -static int  nfs_fsync(struct file *, struct dentry *dentry, int datasync); +static int  nfs_file_fsync(struct file *, struct dentry *dentry, int datasync);  static int nfs_check_flags(int flags);  static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);  static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); @@ -72,7 +72,7 @@ const struct file_operations nfs_file_operations = {  	.open		= nfs_file_open,  	.flush		= nfs_file_flush,  	.release	= nfs_file_release, -	.fsync		= nfs_fsync, +	.fsync		= nfs_file_fsync,  	.lock		= nfs_lock,  	.flock		= nfs_flock,  	.splice_read	= nfs_file_splice_read, @@ -119,13 +119,17 @@ nfs_file_open(struct inode *inode, struct file *filp)  {  	int res; +	dprintk("NFS: open file(%s/%s)\n", +			filp->f_path.dentry->d_parent->d_name.name, +			filp->f_path.dentry->d_name.name); +  	res = nfs_check_flags(filp->f_flags);  	if (res)  		return res;  	nfs_inc_stats(inode, NFSIOS_VFSOPEN);  	lock_kernel(); -	res = NFS_PROTO(inode)->file_open(inode, filp); +	res = nfs_open(inode, filp);  	unlock_kernel();  	return res;  } @@ -133,11 +137,17 @@ nfs_file_open(struct inode *inode, struct file *filp)  static int  nfs_file_release(struct inode *inode, struct file *filp)  { +	struct dentry *dentry = filp->f_path.dentry; + +	dprintk("NFS: release(%s/%s)\n", +			dentry->d_parent->d_name.name, +			dentry->d_name.name); +  	/* Ensure that dirty pages are flushed out with the right creds */  	if (filp->f_mode & FMODE_WRITE) -		nfs_wb_all(filp->f_path.dentry->d_inode); +		nfs_wb_all(dentry->d_inode);  	nfs_inc_stats(inode, NFSIOS_VFSRELEASE); -	return NFS_PROTO(inode)->file_release(inode, filp); +	return nfs_release(inode, filp);  }  /** @@ -171,6 +181,12 @@ force_reval:  static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)  {  	loff_t loff; + +	dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", +			filp->f_path.dentry->d_parent->d_name.name, +			filp->f_path.dentry->d_name.name, +			offset, origin); +  	/* origin == SEEK_END => we must revalidate the cached file length */  	if (origin == SEEK_END) {  		struct inode *inode = filp->f_mapping->host; @@ -185,7 +201,7 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)  }  /* - * Helper for nfs_file_flush() and nfs_fsync() + * Helper for nfs_file_flush() and nfs_file_fsync()   *   * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to   * disk, but it retrieves and clears ctx->error after synching, despite @@ -211,16 +227,18 @@ static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode)  /*   * Flush all dirty pages, and check for write errors. - *   */  static int  nfs_file_flush(struct file *file, fl_owner_t id)  {  	struct nfs_open_context *ctx = nfs_file_open_context(file); -	struct inode	*inode = file->f_path.dentry->d_inode; +	struct dentry	*dentry = file->f_path.dentry; +	struct inode	*inode = dentry->d_inode;  	int		status; -	dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); +	dprintk("NFS: flush(%s/%s)\n", +			dentry->d_parent->d_name.name, +			dentry->d_name.name);  	if ((file->f_mode & FMODE_WRITE) == 0)  		return 0; @@ -245,7 +263,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,  	if (iocb->ki_filp->f_flags & O_DIRECT)  		return nfs_file_direct_read(iocb, iov, nr_segs, pos); -	dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n", +	dprintk("NFS: read(%s/%s, %lu@%lu)\n",  		dentry->d_parent->d_name.name, dentry->d_name.name,  		(unsigned long) count, (unsigned long) pos); @@ -265,7 +283,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,  	struct inode *inode = dentry->d_inode;  	ssize_t res; -	dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n", +	dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n",  		dentry->d_parent->d_name.name, dentry->d_name.name,  		(unsigned long) count, (unsigned long long) *ppos); @@ -282,7 +300,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)  	struct inode *inode = dentry->d_inode;  	int	status; -	dfprintk(VFS, "nfs: mmap(%s/%s)\n", +	dprintk("NFS: mmap(%s/%s)\n",  		dentry->d_parent->d_name.name, dentry->d_name.name);  	status = nfs_revalidate_mapping(inode, file->f_mapping); @@ -300,12 +318,14 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)   * whether any write errors occurred for this process.   */  static int -nfs_fsync(struct file *file, struct dentry *dentry, int datasync) +nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync)  {  	struct nfs_open_context *ctx = nfs_file_open_context(file);  	struct inode *inode = dentry->d_inode; -	dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); +	dprintk("NFS: fsync file(%s/%s) datasync %d\n", +			dentry->d_parent->d_name.name, dentry->d_name.name, +			datasync);  	nfs_inc_stats(inode, NFSIOS_VFSFSYNC);  	return nfs_do_fsync(ctx, inode); @@ -328,6 +348,11 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,  	struct page *page;  	index = pos >> PAGE_CACHE_SHIFT; +	dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", +		file->f_path.dentry->d_parent->d_name.name, +		file->f_path.dentry->d_name.name, +		mapping->host->i_ino, len, (long long) pos); +  	page = __grab_cache_page(mapping, index);  	if (!page)  		return -ENOMEM; @@ -348,6 +373,31 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,  	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);  	int status; +	dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n", +		file->f_path.dentry->d_parent->d_name.name, +		file->f_path.dentry->d_name.name, +		mapping->host->i_ino, len, (long long) pos); + +	/* +	 * Zero any uninitialised parts of the page, and then mark the page +	 * as up to date if it turns out that we're extending the file. +	 */ +	if (!PageUptodate(page)) { +		unsigned pglen = nfs_page_length(page); +		unsigned end = offset + len; + +		if (pglen == 0) { +			zero_user_segments(page, 0, offset, +					end, PAGE_CACHE_SIZE); +			SetPageUptodate(page); +		} else if (end >= pglen) { +			zero_user_segment(page, end, PAGE_CACHE_SIZE); +			if (offset == 0) +				SetPageUptodate(page); +		} else +			zero_user_segment(page, pglen, PAGE_CACHE_SIZE); +	} +  	lock_kernel();  	status = nfs_updatepage(file, page, offset, copied);  	unlock_kernel(); @@ -362,6 +412,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,  static void nfs_invalidate_page(struct page *page, unsigned long offset)  { +	dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset); +  	if (offset != 0)  		return;  	/* Cancel any unstarted writes on this page */ @@ -370,13 +422,20 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)  static int nfs_release_page(struct page *page, gfp_t gfp)  { +	dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); +  	/* If PagePrivate() is set, then the page is not freeable */  	return 0;  }  static int nfs_launder_page(struct page *page)  { -	return nfs_wb_page(page->mapping->host, page); +	struct inode *inode = page->mapping->host; + +	dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", +		inode->i_ino, (long long)page_offset(page)); + +	return nfs_wb_page(inode, page);  }  const struct address_space_operations nfs_file_aops = { @@ -396,13 +455,19 @@ const struct address_space_operations nfs_file_aops = {  static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)  {  	struct file *filp = vma->vm_file; +	struct dentry *dentry = filp->f_path.dentry;  	unsigned pagelen;  	int ret = -EINVAL;  	struct address_space *mapping; +	dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", +		dentry->d_parent->d_name.name, dentry->d_name.name, +		filp->f_mapping->host->i_ino, +		(long long)page_offset(page)); +  	lock_page(page);  	mapping = page->mapping; -	if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) +	if (mapping != dentry->d_inode->i_mapping)  		goto out_unlock;  	ret = 0; @@ -450,9 +515,9 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,  	if (iocb->ki_filp->f_flags & O_DIRECT)  		return nfs_file_direct_write(iocb, iov, nr_segs, pos); -	dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n", +	dprintk("NFS: write(%s/%s, %lu@%Ld)\n",  		dentry->d_parent->d_name.name, dentry->d_name.name, -		inode->i_ino, (unsigned long) count, (long long) pos); +		(unsigned long) count, (long long) pos);  	result = -EBUSY;  	if (IS_SWAPFILE(inode)) @@ -586,7 +651,8 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)  	 * This makes locking act as a cache coherency point.  	 */  	nfs_sync_mapping(filp->f_mapping); -	nfs_zap_caches(inode); +	if (!nfs_have_delegation(inode, FMODE_READ)) +		nfs_zap_caches(inode);  out:  	return status;  } @@ -596,23 +662,35 @@ out:   */  static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)  { -	struct inode * inode = filp->f_mapping->host; +	struct inode *inode = filp->f_mapping->host; +	int ret = -ENOLCK; -	dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n", -			inode->i_sb->s_id, inode->i_ino, +	dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", +			filp->f_path.dentry->d_parent->d_name.name, +			filp->f_path.dentry->d_name.name,  			fl->fl_type, fl->fl_flags,  			(long long)fl->fl_start, (long long)fl->fl_end); +  	nfs_inc_stats(inode, NFSIOS_VFSLOCK);  	/* No mandatory locks over NFS */  	if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) -		return -ENOLCK; +		goto out_err; + +	if (NFS_PROTO(inode)->lock_check_bounds != NULL) { +		ret = NFS_PROTO(inode)->lock_check_bounds(fl); +		if (ret < 0) +			goto out_err; +	}  	if (IS_GETLK(cmd)) -		return do_getlk(filp, cmd, fl); -	if (fl->fl_type == F_UNLCK) -		return do_unlk(filp, cmd, fl); -	return do_setlk(filp, cmd, fl); +		ret = do_getlk(filp, cmd, fl); +	else if (fl->fl_type == F_UNLCK) +		ret = do_unlk(filp, cmd, fl); +	else +		ret = do_setlk(filp, cmd, fl); +out_err: +	return ret;  }  /* @@ -620,9 +698,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)   */  static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)  { -	dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n", -			filp->f_path.dentry->d_inode->i_sb->s_id, -			filp->f_path.dentry->d_inode->i_ino, +	dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", +			filp->f_path.dentry->d_parent->d_name.name, +			filp->f_path.dentry->d_name.name,  			fl->fl_type, fl->fl_flags);  	/* @@ -645,12 +723,15 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)  	return do_setlk(filp, cmd, fl);  } +/* + * There is no protocol support for leases, so we have no way to implement + * them correctly in the face of opens by other clients. + */  static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)  { -	/* -	 * There is no protocol support for leases, so we have no way -	 * to implement them correctly in the face of opens by other -	 * clients. -	 */ +	dprintk("NFS: setlease(%s/%s, arg=%ld)\n", +			file->f_path.dentry->d_parent->d_name.name, +			file->f_path.dentry->d_name.name, arg); +  	return -EINVAL;  } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 596c5d8e86f4..2c23d067e2a6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -57,8 +57,6 @@ static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;  static void nfs_invalidate_inode(struct inode *);  static int nfs_update_inode(struct inode *, struct nfs_fattr *); -static void nfs_zap_acl_cache(struct inode *); -  static struct kmem_cache * nfs_inode_cachep;  static inline unsigned long @@ -167,7 +165,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)  	}  } -static void nfs_zap_acl_cache(struct inode *inode) +void nfs_zap_acl_cache(struct inode *inode)  {  	void (*clear_acl_cache)(struct inode *); @@ -347,7 +345,7 @@ out_no_inode:  	goto out;  } -#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET) +#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE)  int  nfs_setattr(struct dentry *dentry, struct iattr *attr) @@ -369,7 +367,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)  	/* Optimization: if the end result is no change, don't RPC */  	attr->ia_valid &= NFS_VALID_ATTRS; -	if (attr->ia_valid == 0) +	if ((attr->ia_valid & ~ATTR_FILE) == 0)  		return 0;  	lock_kernel(); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 04ae867dddba..24241fcbb98d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -150,6 +150,7 @@ extern void nfs_clear_inode(struct inode *);  #ifdef CONFIG_NFS_V4  extern void nfs4_clear_inode(struct inode *);  #endif +void nfs_zap_acl_cache(struct inode *inode);  /* super.c */  extern struct file_system_type nfs_xdev_fs_type; diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index 6350ecbde589..a36952810032 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -5,135 +5,41 @@   *   *  Copyright (C) 2005, 2006 Chuck Lever <[email protected]>   * - *  NFS client per-mount statistics provide information about the health of - *  the NFS client and the health of each NFS mount point.  Generally these - *  are not for detailed problem diagnosis, but simply to indicate that there - *  is a problem. - * - *  These counters are not meant to be human-readable, but are meant to be - *  integrated into system monitoring tools such as "sar" and "iostat".  As - *  such, the counters are sampled by the tools over time, and are never - *  zeroed after a file system is mounted.  Moving averages can be computed - *  by the tools by taking the difference between two instantaneous samples - *  and dividing that by the time between the samples.   */  #ifndef _NFS_IOSTAT  #define _NFS_IOSTAT -#define NFS_IOSTAT_VERS		"1.0" - -/* - * NFS byte counters - * - * 1.  SERVER - the number of payload bytes read from or written to the - *     server by the NFS client via an NFS READ or WRITE request. - * - * 2.  NORMAL - the number of bytes read or written by applications via - *     the read(2) and write(2) system call interfaces. - * - * 3.  DIRECT - the number of bytes read or written from files opened - *     with the O_DIRECT flag. - * - * These counters give a view of the data throughput into and out of the NFS - * client.  Comparing the number of bytes requested by an application with the - * number of bytes the client requests from the server can provide an - * indication of client efficiency (per-op, cache hits, etc). - * - * These counters can also help characterize which access methods are in - * use.  DIRECT by itself shows whether there is any O_DIRECT traffic. - * NORMAL + DIRECT shows how much data is going through the system call - * interface.  A large amount of SERVER traffic without much NORMAL or - * DIRECT traffic shows that applications are using mapped files. - * - * NFS page counters - * - * These count the number of pages read or written via nfs_readpage(), - * nfs_readpages(), or their write equivalents. - */ -enum nfs_stat_bytecounters { -	NFSIOS_NORMALREADBYTES = 0, -	NFSIOS_NORMALWRITTENBYTES, -	NFSIOS_DIRECTREADBYTES, -	NFSIOS_DIRECTWRITTENBYTES, -	NFSIOS_SERVERREADBYTES, -	NFSIOS_SERVERWRITTENBYTES, -	NFSIOS_READPAGES, -	NFSIOS_WRITEPAGES, -	__NFSIOS_BYTESMAX, -}; - -/* - * NFS event counters - * - * These counters provide a low-overhead way of monitoring client activity - * without enabling NFS trace debugging.  The counters show the rate at - * which VFS requests are made, and how often the client invalidates its - * data and attribute caches.  This allows system administrators to monitor - * such things as how close-to-open is working, and answer questions such - * as "why are there so many GETATTR requests on the wire?" - * - * They also count anamolous events such as short reads and writes, silly - * renames due to close-after-delete, and operations that change the size - * of a file (such operations can often be the source of data corruption - * if applications aren't using file locking properly). - */ -enum nfs_stat_eventcounters { -	NFSIOS_INODEREVALIDATE = 0, -	NFSIOS_DENTRYREVALIDATE, -	NFSIOS_DATAINVALIDATE, -	NFSIOS_ATTRINVALIDATE, -	NFSIOS_VFSOPEN, -	NFSIOS_VFSLOOKUP, -	NFSIOS_VFSACCESS, -	NFSIOS_VFSUPDATEPAGE, -	NFSIOS_VFSREADPAGE, -	NFSIOS_VFSREADPAGES, -	NFSIOS_VFSWRITEPAGE, -	NFSIOS_VFSWRITEPAGES, -	NFSIOS_VFSGETDENTS, -	NFSIOS_VFSSETATTR, -	NFSIOS_VFSFLUSH, -	NFSIOS_VFSFSYNC, -	NFSIOS_VFSLOCK, -	NFSIOS_VFSRELEASE, -	NFSIOS_CONGESTIONWAIT, -	NFSIOS_SETATTRTRUNC, -	NFSIOS_EXTENDWRITE, -	NFSIOS_SILLYRENAME, -	NFSIOS_SHORTREAD, -	NFSIOS_SHORTWRITE, -	NFSIOS_DELAY, -	__NFSIOS_COUNTSMAX, -}; - -#ifdef __KERNEL__ -  #include <linux/percpu.h>  #include <linux/cache.h> +#include <linux/nfs_iostat.h>  struct nfs_iostats {  	unsigned long long	bytes[__NFSIOS_BYTESMAX];  	unsigned long		events[__NFSIOS_COUNTSMAX];  } ____cacheline_aligned; -static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat) +static inline void nfs_inc_server_stats(const struct nfs_server *server, +					enum nfs_stat_eventcounters stat)  {  	struct nfs_iostats *iostats;  	int cpu;  	cpu = get_cpu();  	iostats = per_cpu_ptr(server->io_stats, cpu); -	iostats->events[stat] ++; +	iostats->events[stat]++;  	put_cpu_no_resched();  } -static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat) +static inline void nfs_inc_stats(const struct inode *inode, +				 enum nfs_stat_eventcounters stat)  {  	nfs_inc_server_stats(NFS_SERVER(inode), stat);  } -static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend) +static inline void nfs_add_server_stats(const struct nfs_server *server, +					enum nfs_stat_bytecounters stat, +					unsigned long addend)  {  	struct nfs_iostats *iostats;  	int cpu; @@ -144,7 +50,9 @@ static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat  	put_cpu_no_resched();  } -static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend) +static inline void nfs_add_stats(const struct inode *inode, +				 enum nfs_stat_bytecounters stat, +				 unsigned long addend)  {  	nfs_add_server_stats(NFS_SERVER(inode), stat, addend);  } @@ -160,5 +68,4 @@ static inline void nfs_free_iostats(struct nfs_iostats *stats)  		free_percpu(stats);  } -#endif -#endif +#endif /* _NFS_IOSTAT */ diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 9b7362565c0c..423842f51ac9 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -5,6 +5,8 @@  #include <linux/posix_acl_xattr.h>  #include <linux/nfsacl.h> +#include "internal.h" +  #define NFSDBG_FACILITY	NFSDBG_PROC  ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size) @@ -205,6 +207,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)  	status = nfs_revalidate_inode(server, inode);  	if (status < 0)  		return ERR_PTR(status); +	if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) +		nfs_zap_acl_cache(inode);  	acl = nfs3_get_cached_acl(inode, type);  	if (acl != ERR_PTR(-EAGAIN))  		return acl; @@ -319,9 +323,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,  	dprintk("NFS call setacl\n");  	msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];  	status = rpc_call_sync(server->client_acl, &msg, 0); -	spin_lock(&inode->i_lock); -	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; -	spin_unlock(&inode->i_lock); +	nfs_access_zap_cache(inode); +	nfs_zap_acl_cache(inode);  	dprintk("NFS reply setacl: %d\n", status);  	/* pages may have been allocated at the xdr layer. */ diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index c3523ad03ed1..1e750e4574a9 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -129,6 +129,8 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,  	int	status;  	dprintk("NFS call  setattr\n"); +	if (sattr->ia_valid & ATTR_FILE) +		msg.rpc_cred = nfs_file_cred(sattr->ia_file);  	nfs_fattr_init(fattr);  	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);  	if (status == 0) @@ -248,6 +250,53 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,  	return status;  } +struct nfs3_createdata { +	struct rpc_message msg; +	union { +		struct nfs3_createargs create; +		struct nfs3_mkdirargs mkdir; +		struct nfs3_symlinkargs symlink; +		struct nfs3_mknodargs mknod; +	} arg; +	struct nfs3_diropres res; +	struct nfs_fh fh; +	struct nfs_fattr fattr; +	struct nfs_fattr dir_attr; +}; + +static struct nfs3_createdata *nfs3_alloc_createdata(void) +{ +	struct nfs3_createdata *data; + +	data = kzalloc(sizeof(*data), GFP_KERNEL); +	if (data != NULL) { +		data->msg.rpc_argp = &data->arg; +		data->msg.rpc_resp = &data->res; +		data->res.fh = &data->fh; +		data->res.fattr = &data->fattr; +		data->res.dir_attr = &data->dir_attr; +		nfs_fattr_init(data->res.fattr); +		nfs_fattr_init(data->res.dir_attr); +	} +	return data; +} + +static int nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_createdata *data) +{ +	int status; + +	status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); +	nfs_post_op_update_inode(dir, data->res.dir_attr); +	if (status == 0) +		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); +	return status; +} + +static void nfs3_free_createdata(struct nfs3_createdata *data) +{ +	kfree(data); +} +  /*   * Create a regular file.   * For now, we don't implement O_EXCL. @@ -256,70 +305,60 @@ static int  nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,  		 int flags, struct nameidata *nd)  { -	struct nfs_fh		fhandle; -	struct nfs_fattr	fattr; -	struct nfs_fattr	dir_attr; -	struct nfs3_createargs	arg = { -		.fh		= NFS_FH(dir), -		.name		= dentry->d_name.name, -		.len		= dentry->d_name.len, -		.sattr		= sattr, -	}; -	struct nfs3_diropres	res = { -		.dir_attr	= &dir_attr, -		.fh		= &fhandle, -		.fattr		= &fattr -	}; -	struct rpc_message msg = { -		.rpc_proc	= &nfs3_procedures[NFS3PROC_CREATE], -		.rpc_argp	= &arg, -		.rpc_resp	= &res, -	}; +	struct nfs3_createdata *data;  	mode_t mode = sattr->ia_mode; -	int status; +	int status = -ENOMEM;  	dprintk("NFS call  create %s\n", dentry->d_name.name); -	arg.createmode = NFS3_CREATE_UNCHECKED; + +	data = nfs3_alloc_createdata(); +	if (data == NULL) +		goto out; + +	data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_CREATE]; +	data->arg.create.fh = NFS_FH(dir); +	data->arg.create.name = dentry->d_name.name; +	data->arg.create.len = dentry->d_name.len; +	data->arg.create.sattr = sattr; + +	data->arg.create.createmode = NFS3_CREATE_UNCHECKED;  	if (flags & O_EXCL) { -		arg.createmode  = NFS3_CREATE_EXCLUSIVE; -		arg.verifier[0] = jiffies; -		arg.verifier[1] = current->pid; +		data->arg.create.createmode  = NFS3_CREATE_EXCLUSIVE; +		data->arg.create.verifier[0] = jiffies; +		data->arg.create.verifier[1] = current->pid;  	}  	sattr->ia_mode &= ~current->fs->umask; -again: -	nfs_fattr_init(&dir_attr); -	nfs_fattr_init(&fattr); -	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); -	nfs_refresh_inode(dir, &dir_attr); +	for (;;) { +		status = nfs3_do_create(dir, dentry, data); -	/* If the server doesn't support the exclusive creation semantics, -	 * try again with simple 'guarded' mode. */ -	if (status == -ENOTSUPP) { -		switch (arg.createmode) { +		if (status != -ENOTSUPP) +			break; +		/* If the server doesn't support the exclusive creation +		 * semantics, try again with simple 'guarded' mode. */ +		switch (data->arg.create.createmode) {  			case NFS3_CREATE_EXCLUSIVE: -				arg.createmode = NFS3_CREATE_GUARDED; +				data->arg.create.createmode = NFS3_CREATE_GUARDED;  				break;  			case NFS3_CREATE_GUARDED: -				arg.createmode = NFS3_CREATE_UNCHECKED; +				data->arg.create.createmode = NFS3_CREATE_UNCHECKED;  				break;  			case NFS3_CREATE_UNCHECKED:  				goto out;  		} -		goto again; +		nfs_fattr_init(data->res.dir_attr); +		nfs_fattr_init(data->res.fattr);  	} -	if (status == 0) -		status = nfs_instantiate(dentry, &fhandle, &fattr);  	if (status != 0)  		goto out;  	/* When we created the file with exclusive semantics, make  	 * sure we set the attributes afterwards. */ -	if (arg.createmode == NFS3_CREATE_EXCLUSIVE) { +	if (data->arg.create.createmode == NFS3_CREATE_EXCLUSIVE) {  		dprintk("NFS call  setattr (post-create)\n");  		if (!(sattr->ia_valid & ATTR_ATIME_SET)) @@ -330,14 +369,15 @@ again:  		/* Note: we could use a guarded setattr here, but I'm  		 * not sure this buys us anything (and I'd have  		 * to revamp the NFSv3 XDR code) */ -		status = nfs3_proc_setattr(dentry, &fattr, sattr); -		nfs_post_op_update_inode(dentry->d_inode, &fattr); +		status = nfs3_proc_setattr(dentry, data->res.fattr, sattr); +		nfs_post_op_update_inode(dentry->d_inode, data->res.fattr);  		dprintk("NFS reply setattr (post-create): %d\n", status); +		if (status != 0) +			goto out;  	} -	if (status != 0) -		goto out;  	status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);  out: +	nfs3_free_createdata(data);  	dprintk("NFS reply create: %d\n", status);  	return status;  } @@ -452,40 +492,28 @@ static int  nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,  		  unsigned int len, struct iattr *sattr)  { -	struct nfs_fh fhandle; -	struct nfs_fattr fattr, dir_attr; -	struct nfs3_symlinkargs	arg = { -		.fromfh		= NFS_FH(dir), -		.fromname	= dentry->d_name.name, -		.fromlen	= dentry->d_name.len, -		.pages		= &page, -		.pathlen	= len, -		.sattr		= sattr -	}; -	struct nfs3_diropres	res = { -		.dir_attr	= &dir_attr, -		.fh		= &fhandle, -		.fattr		= &fattr -	}; -	struct rpc_message msg = { -		.rpc_proc	= &nfs3_procedures[NFS3PROC_SYMLINK], -		.rpc_argp	= &arg, -		.rpc_resp	= &res, -	}; -	int			status; +	struct nfs3_createdata *data; +	int status = -ENOMEM;  	if (len > NFS3_MAXPATHLEN)  		return -ENAMETOOLONG;  	dprintk("NFS call  symlink %s\n", dentry->d_name.name); -	nfs_fattr_init(&dir_attr); -	nfs_fattr_init(&fattr); -	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); -	nfs_post_op_update_inode(dir, &dir_attr); -	if (status != 0) +	data = nfs3_alloc_createdata(); +	if (data == NULL)  		goto out; -	status = nfs_instantiate(dentry, &fhandle, &fattr); +	data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK]; +	data->arg.symlink.fromfh = NFS_FH(dir); +	data->arg.symlink.fromname = dentry->d_name.name; +	data->arg.symlink.fromlen = dentry->d_name.len; +	data->arg.symlink.pages = &page; +	data->arg.symlink.pathlen = len; +	data->arg.symlink.sattr = sattr; + +	status = nfs3_do_create(dir, dentry, data); + +	nfs3_free_createdata(data);  out:  	dprintk("NFS reply symlink: %d\n", status);  	return status; @@ -494,42 +522,31 @@ out:  static int  nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)  { -	struct nfs_fh fhandle; -	struct nfs_fattr fattr, dir_attr; -	struct nfs3_mkdirargs	arg = { -		.fh		= NFS_FH(dir), -		.name		= dentry->d_name.name, -		.len		= dentry->d_name.len, -		.sattr		= sattr -	}; -	struct nfs3_diropres	res = { -		.dir_attr	= &dir_attr, -		.fh		= &fhandle, -		.fattr		= &fattr -	}; -	struct rpc_message msg = { -		.rpc_proc	= &nfs3_procedures[NFS3PROC_MKDIR], -		.rpc_argp	= &arg, -		.rpc_resp	= &res, -	}; +	struct nfs3_createdata *data;  	int mode = sattr->ia_mode; -	int status; +	int status = -ENOMEM;  	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);  	sattr->ia_mode &= ~current->fs->umask; -	nfs_fattr_init(&dir_attr); -	nfs_fattr_init(&fattr); -	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); -	nfs_post_op_update_inode(dir, &dir_attr); -	if (status != 0) +	data = nfs3_alloc_createdata(); +	if (data == NULL)  		goto out; -	status = nfs_instantiate(dentry, &fhandle, &fattr); + +	data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR]; +	data->arg.mkdir.fh = NFS_FH(dir); +	data->arg.mkdir.name = dentry->d_name.name; +	data->arg.mkdir.len = dentry->d_name.len; +	data->arg.mkdir.sattr = sattr; + +	status = nfs3_do_create(dir, dentry, data);  	if (status != 0)  		goto out; +  	status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);  out: +	nfs3_free_createdata(data);  	dprintk("NFS reply mkdir: %d\n", status);  	return status;  } @@ -615,52 +632,50 @@ static int  nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,  		dev_t rdev)  { -	struct nfs_fh fh; -	struct nfs_fattr fattr, dir_attr; -	struct nfs3_mknodargs	arg = { -		.fh		= NFS_FH(dir), -		.name		= dentry->d_name.name, -		.len		= dentry->d_name.len, -		.sattr		= sattr, -		.rdev		= rdev -	}; -	struct nfs3_diropres	res = { -		.dir_attr	= &dir_attr, -		.fh		= &fh, -		.fattr		= &fattr -	}; -	struct rpc_message msg = { -		.rpc_proc	= &nfs3_procedures[NFS3PROC_MKNOD], -		.rpc_argp	= &arg, -		.rpc_resp	= &res, -	}; +	struct nfs3_createdata *data;  	mode_t mode = sattr->ia_mode; -	int status; - -	switch (sattr->ia_mode & S_IFMT) { -	case S_IFBLK:	arg.type = NF3BLK;  break; -	case S_IFCHR:	arg.type = NF3CHR;  break; -	case S_IFIFO:	arg.type = NF3FIFO; break; -	case S_IFSOCK:	arg.type = NF3SOCK; break; -	default:	return -EINVAL; -	} +	int status = -ENOMEM;  	dprintk("NFS call  mknod %s %u:%u\n", dentry->d_name.name,  			MAJOR(rdev), MINOR(rdev));  	sattr->ia_mode &= ~current->fs->umask; -	nfs_fattr_init(&dir_attr); -	nfs_fattr_init(&fattr); -	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); -	nfs_post_op_update_inode(dir, &dir_attr); -	if (status != 0) +	data = nfs3_alloc_createdata(); +	if (data == NULL)  		goto out; -	status = nfs_instantiate(dentry, &fh, &fattr); + +	data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD]; +	data->arg.mknod.fh = NFS_FH(dir); +	data->arg.mknod.name = dentry->d_name.name; +	data->arg.mknod.len = dentry->d_name.len; +	data->arg.mknod.sattr = sattr; +	data->arg.mknod.rdev = rdev; + +	switch (sattr->ia_mode & S_IFMT) { +	case S_IFBLK: +		data->arg.mknod.type = NF3BLK; +		break; +	case S_IFCHR: +		data->arg.mknod.type = NF3CHR; +		break; +	case S_IFIFO: +		data->arg.mknod.type = NF3FIFO; +		break; +	case S_IFSOCK: +		data->arg.mknod.type = NF3SOCK; +		break; +	default: +		status = -EINVAL; +		goto out; +	} + +	status = nfs3_do_create(dir, dentry, data);  	if (status != 0)  		goto out;  	status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);  out: +	nfs3_free_createdata(data);  	dprintk("NFS reply mknod: %d\n", status);  	return status;  } @@ -801,8 +816,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = {  	.write_done	= nfs3_write_done,  	.commit_setup	= nfs3_proc_commit_setup,  	.commit_done	= nfs3_commit_done, -	.file_open	= nfs_open, -	.file_release	= nfs_release,  	.lock		= nfs3_proc_lock,  	.clear_acl_cache = nfs3_forget_cached_acls,  }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1293e0acd82b..4451287a81d1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1139,8 +1139,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int  	return res;  } -static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, -                struct iattr *sattr, struct nfs4_state *state) +static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, +			    struct nfs_fattr *fattr, struct iattr *sattr, +			    struct nfs4_state *state)  {  	struct nfs_server *server = NFS_SERVER(inode);          struct nfs_setattrargs  arg = { @@ -1154,9 +1155,10 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,  		.server		= server,          };          struct rpc_message msg = { -                .rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], -                .rpc_argp       = &arg, -                .rpc_resp       = &res, +		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_SETATTR], +		.rpc_argp	= &arg, +		.rpc_resp	= &res, +		.rpc_cred	= cred,          };  	unsigned long timestamp = jiffies;  	int status; @@ -1166,7 +1168,6 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,  	if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {  		/* Use that stateid */  	} else if (state != NULL) { -		msg.rpc_cred = state->owner->so_cred;  		nfs4_copy_stateid(&arg.stateid, state, current->files);  	} else  		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); @@ -1177,15 +1178,16 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,  	return status;  } -static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, -                struct iattr *sattr, struct nfs4_state *state) +static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, +			   struct nfs_fattr *fattr, struct iattr *sattr, +			   struct nfs4_state *state)  {  	struct nfs_server *server = NFS_SERVER(inode);  	struct nfs4_exception exception = { };  	int err;  	do {  		err = nfs4_handle_exception(server, -				_nfs4_do_setattr(inode, fattr, sattr, state), +				_nfs4_do_setattr(inode, cred, fattr, sattr, state),  				&exception);  	} while (exception.retry);  	return err; @@ -1647,29 +1649,25 @@ static int  nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,  		  struct iattr *sattr)  { -	struct rpc_cred *cred;  	struct inode *inode = dentry->d_inode; -	struct nfs_open_context *ctx; +	struct rpc_cred *cred = NULL;  	struct nfs4_state *state = NULL;  	int status;  	nfs_fattr_init(fattr); -	cred = rpc_lookup_cred(); -	if (IS_ERR(cred)) -		return PTR_ERR(cred); -  	/* Search for an existing open(O_WRITE) file */ -	ctx = nfs_find_open_context(inode, cred, FMODE_WRITE); -	if (ctx != NULL) +	if (sattr->ia_valid & ATTR_FILE) { +		struct nfs_open_context *ctx; + +		ctx = nfs_file_open_context(sattr->ia_file); +		cred = ctx->cred;  		state = ctx->state; +	} -	status = nfs4_do_setattr(inode, fattr, sattr, state); +	status = nfs4_do_setattr(inode, cred, fattr, sattr, state);  	if (status == 0)  		nfs_setattr_update_inode(inode, sattr); -	if (ctx != NULL) -		put_nfs_open_context(ctx); -	put_rpccred(cred);  	return status;  } @@ -1897,17 +1895,16 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,  		goto out;  	}  	state = nfs4_do_open(dir, &path, flags, sattr, cred); -	put_rpccred(cred);  	d_drop(dentry);  	if (IS_ERR(state)) {  		status = PTR_ERR(state); -		goto out; +		goto out_putcred;  	}  	d_add(dentry, igrab(state->inode));  	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));  	if (flags & O_EXCL) {  		struct nfs_fattr fattr; -		status = nfs4_do_setattr(state->inode, &fattr, sattr, state); +		status = nfs4_do_setattr(state->inode, cred, &fattr, sattr, state);  		if (status == 0)  			nfs_setattr_update_inode(state->inode, sattr);  		nfs_post_op_update_inode(state->inode, &fattr); @@ -1916,6 +1913,8 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,  		status = nfs4_intent_set_file(nd, &path, state);  	else  		nfs4_close_sync(&path, state, flags); +out_putcred: +	put_rpccred(cred);  out:  	return status;  } @@ -2079,47 +2078,81 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n  	return err;  } +struct nfs4_createdata { +	struct rpc_message msg; +	struct nfs4_create_arg arg; +	struct nfs4_create_res res; +	struct nfs_fh fh; +	struct nfs_fattr fattr; +	struct nfs_fattr dir_fattr; +}; + +static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, +		struct qstr *name, struct iattr *sattr, u32 ftype) +{ +	struct nfs4_createdata *data; + +	data = kzalloc(sizeof(*data), GFP_KERNEL); +	if (data != NULL) { +		struct nfs_server *server = NFS_SERVER(dir); + +		data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE]; +		data->msg.rpc_argp = &data->arg; +		data->msg.rpc_resp = &data->res; +		data->arg.dir_fh = NFS_FH(dir); +		data->arg.server = server; +		data->arg.name = name; +		data->arg.attrs = sattr; +		data->arg.ftype = ftype; +		data->arg.bitmask = server->attr_bitmask; +		data->res.server = server; +		data->res.fh = &data->fh; +		data->res.fattr = &data->fattr; +		data->res.dir_fattr = &data->dir_fattr; +		nfs_fattr_init(data->res.fattr); +		nfs_fattr_init(data->res.dir_fattr); +	} +	return data; +} + +static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) +{ +	int status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); +	if (status == 0) { +		update_changeattr(dir, &data->res.dir_cinfo); +		nfs_post_op_update_inode(dir, data->res.dir_fattr); +		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); +	} +	return status; +} + +static void nfs4_free_createdata(struct nfs4_createdata *data) +{ +	kfree(data); +} +  static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,  		struct page *page, unsigned int len, struct iattr *sattr)  { -	struct nfs_server *server = NFS_SERVER(dir); -	struct nfs_fh fhandle; -	struct nfs_fattr fattr, dir_fattr; -	struct nfs4_create_arg arg = { -		.dir_fh = NFS_FH(dir), -		.server = server, -		.name = &dentry->d_name, -		.attrs = sattr, -		.ftype = NF4LNK, -		.bitmask = server->attr_bitmask, -	}; -	struct nfs4_create_res res = { -		.server = server, -		.fh = &fhandle, -		.fattr = &fattr, -		.dir_fattr = &dir_fattr, -	}; -	struct rpc_message msg = { -		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK], -		.rpc_argp = &arg, -		.rpc_resp = &res, -	}; -	int			status; +	struct nfs4_createdata *data; +	int status = -ENAMETOOLONG;  	if (len > NFS4_MAXPATHLEN) -		return -ENAMETOOLONG; +		goto out; -	arg.u.symlink.pages = &page; -	arg.u.symlink.len = len; -	nfs_fattr_init(&fattr); -	nfs_fattr_init(&dir_fattr); +	status = -ENOMEM; +	data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4LNK); +	if (data == NULL) +		goto out; + +	data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK]; +	data->arg.u.symlink.pages = &page; +	data->arg.u.symlink.len = len; -	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); -	if (!status) { -		update_changeattr(dir, &res.dir_cinfo); -		nfs_post_op_update_inode(dir, res.dir_fattr); -		status = nfs_instantiate(dentry, &fhandle, &fattr); -	} +	status = nfs4_do_create(dir, dentry, data); + +	nfs4_free_createdata(data); +out:  	return status;  } @@ -2140,39 +2173,17 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,  static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,  		struct iattr *sattr)  { -	struct nfs_server *server = NFS_SERVER(dir); -	struct nfs_fh fhandle; -	struct nfs_fattr fattr, dir_fattr; -	struct nfs4_create_arg arg = { -		.dir_fh = NFS_FH(dir), -		.server = server, -		.name = &dentry->d_name, -		.attrs = sattr, -		.ftype = NF4DIR, -		.bitmask = server->attr_bitmask, -	}; -	struct nfs4_create_res res = { -		.server = server, -		.fh = &fhandle, -		.fattr = &fattr, -		.dir_fattr = &dir_fattr, -	}; -	struct rpc_message msg = { -		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], -		.rpc_argp = &arg, -		.rpc_resp = &res, -	}; -	int			status; +	struct nfs4_createdata *data; +	int status = -ENOMEM; -	nfs_fattr_init(&fattr); -	nfs_fattr_init(&dir_fattr); -	 -	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); -	if (!status) { -		update_changeattr(dir, &res.dir_cinfo); -		nfs_post_op_update_inode(dir, res.dir_fattr); -		status = nfs_instantiate(dentry, &fhandle, &fattr); -	} +	data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4DIR); +	if (data == NULL) +		goto out; + +	status = nfs4_do_create(dir, dentry, data); + +	nfs4_free_createdata(data); +out:  	return status;  } @@ -2242,56 +2253,34 @@ static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,  static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,  		struct iattr *sattr, dev_t rdev)  { -	struct nfs_server *server = NFS_SERVER(dir); -	struct nfs_fh fh; -	struct nfs_fattr fattr, dir_fattr; -	struct nfs4_create_arg arg = { -		.dir_fh = NFS_FH(dir), -		.server = server, -		.name = &dentry->d_name, -		.attrs = sattr, -		.bitmask = server->attr_bitmask, -	}; -	struct nfs4_create_res res = { -		.server = server, -		.fh = &fh, -		.fattr = &fattr, -		.dir_fattr = &dir_fattr, -	}; -	struct rpc_message msg = { -		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], -		.rpc_argp = &arg, -		.rpc_resp = &res, -	}; -	int			status; -	int                     mode = sattr->ia_mode; - -	nfs_fattr_init(&fattr); -	nfs_fattr_init(&dir_fattr); +	struct nfs4_createdata *data; +	int mode = sattr->ia_mode; +	int status = -ENOMEM;  	BUG_ON(!(sattr->ia_valid & ATTR_MODE));  	BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); + +	data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4SOCK); +	if (data == NULL) +		goto out; +  	if (S_ISFIFO(mode)) -		arg.ftype = NF4FIFO; +		data->arg.ftype = NF4FIFO;  	else if (S_ISBLK(mode)) { -		arg.ftype = NF4BLK; -		arg.u.device.specdata1 = MAJOR(rdev); -		arg.u.device.specdata2 = MINOR(rdev); +		data->arg.ftype = NF4BLK; +		data->arg.u.device.specdata1 = MAJOR(rdev); +		data->arg.u.device.specdata2 = MINOR(rdev);  	}  	else if (S_ISCHR(mode)) { -		arg.ftype = NF4CHR; -		arg.u.device.specdata1 = MAJOR(rdev); -		arg.u.device.specdata2 = MINOR(rdev); +		data->arg.ftype = NF4CHR; +		data->arg.u.device.specdata1 = MAJOR(rdev); +		data->arg.u.device.specdata2 = MINOR(rdev);  	} -	else -		arg.ftype = NF4SOCK; -	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); -	if (status == 0) { -		update_changeattr(dir, &res.dir_cinfo); -		nfs_post_op_update_inode(dir, res.dir_fattr); -		status = nfs_instantiate(dentry, &fh, &fattr); -	} +	status = nfs4_do_create(dir, dentry, data); + +	nfs4_free_createdata(data); +out:  	return status;  } @@ -2706,6 +2695,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)  	ret = nfs_revalidate_inode(server, inode);  	if (ret < 0)  		return ret; +	if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) +		nfs_zap_acl_cache(inode);  	ret = nfs4_read_cached_acl(inode, buf, buflen);  	if (ret != -ENOENT)  		return ret; @@ -2733,7 +2724,8 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl  	nfs_inode_return_delegation(inode);  	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);  	ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); -	nfs_zap_caches(inode); +	nfs_access_zap_cache(inode); +	nfs_zap_acl_cache(inode);  	return ret;  } @@ -2767,8 +2759,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)  			task->tk_status = 0;  			return -EAGAIN;  		case -NFS4ERR_DELAY: -			nfs_inc_server_stats((struct nfs_server *) server, -						NFSIOS_DELAY); +			nfs_inc_server_stats(server, NFSIOS_DELAY);  		case -NFS4ERR_GRACE:  			rpc_delay(task, NFS4_POLL_RETRY_MAX);  			task->tk_status = 0; @@ -2933,7 +2924,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cre  int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)  { -	long timeout; +	long timeout = 0;  	int err;  	do {  		err = _nfs4_proc_setclientid_confirm(clp, cred); @@ -3725,8 +3716,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {  	.write_done	= nfs4_write_done,  	.commit_setup	= nfs4_proc_commit_setup,  	.commit_done	= nfs4_commit_done, -	.file_open      = nfs_open, -	.file_release   = nfs_release,  	.lock		= nfs4_proc_lock,  	.clear_acl_cache = nfs4_zap_acl_attr,  }; diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 531379d36823..46763d1cd397 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -1,6 +1,4 @@  /* - *  $Id: nfsroot.c,v 1.45 1998/03/07 10:44:46 mj Exp $ - *   *  Copyright (C) 1995, 1996  Gero Kuhlmann <[email protected]>   *   *  Allow an NFS filesystem to be mounted as root. The way this works is: @@ -297,10 +295,10 @@ static int __init root_nfs_name(char *name)  	nfs_data.flags    = NFS_MOUNT_NONLM;	/* No lockd in nfs root yet */  	nfs_data.rsize    = NFS_DEF_FILE_IO_SIZE;  	nfs_data.wsize    = NFS_DEF_FILE_IO_SIZE; -	nfs_data.acregmin = 3; -	nfs_data.acregmax = 60; -	nfs_data.acdirmin = 30; -	nfs_data.acdirmax = 60; +	nfs_data.acregmin = NFS_DEF_ACREGMIN; +	nfs_data.acregmax = NFS_DEF_ACREGMAX; +	nfs_data.acdirmin = NFS_DEF_ACDIRMIN; +	nfs_data.acdirmax = NFS_DEF_ACDIRMAX;  	strcpy(buf, NFS_ROOT);  	/* Process options received from the remote server */ diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 03599bfe81cf..4dbb84df1b68 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -129,6 +129,8 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,  	sattr->ia_mode &= S_IALLUGO;  	dprintk("NFS call  setattr\n"); +	if (sattr->ia_valid & ATTR_FILE) +		msg.rpc_cred = nfs_file_cred(sattr->ia_file);  	nfs_fattr_init(fattr);  	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);  	if (status == 0) @@ -598,6 +600,29 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)  	return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);  } +/* Helper functions for NFS lock bounds checking */ +#define NFS_LOCK32_OFFSET_MAX ((__s32)0x7fffffffUL) +static int nfs_lock_check_bounds(const struct file_lock *fl) +{ +	__s32 start, end; + +	start = (__s32)fl->fl_start; +	if ((loff_t)start != fl->fl_start) +		goto out_einval; + +	if (fl->fl_end != OFFSET_MAX) { +		end = (__s32)fl->fl_end; +		if ((loff_t)end != fl->fl_end) +			goto out_einval; +	} else +		end = NFS_LOCK32_OFFSET_MAX; + +	if (start < 0 || start > end) +		goto out_einval; +	return 0; +out_einval: +	return -EINVAL; +}  const struct nfs_rpc_ops nfs_v2_clientops = {  	.version	= 2,		       /* protocol version */ @@ -630,7 +655,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = {  	.write_setup	= nfs_proc_write_setup,  	.write_done	= nfs_write_done,  	.commit_setup	= nfs_proc_commit_setup, -	.file_open	= nfs_open, -	.file_release	= nfs_release,  	.lock		= nfs_proc_lock, +	.lock_check_bounds = nfs_lock_check_bounds,  }; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 614efeed5437..47cf83e917be 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -47,6 +47,7 @@  #include <linux/inet.h>  #include <linux/in6.h>  #include <net/ipv6.h> +#include <linux/netdevice.h>  #include <linux/nfs_xdr.h>  #include <linux/magic.h>  #include <linux/parser.h> @@ -65,7 +66,6 @@  enum {  	/* Mount options that take no arguments */  	Opt_soft, Opt_hard, -	Opt_intr, Opt_nointr,  	Opt_posix, Opt_noposix,  	Opt_cto, Opt_nocto,  	Opt_ac, Opt_noac, @@ -92,8 +92,8 @@ enum {  	Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,  	Opt_addr, Opt_mountaddr, Opt_clientaddr, -	/* Mount options that are ignored */ -	Opt_userspace, Opt_deprecated, +	/* Special mount options */ +	Opt_userspace, Opt_deprecated, Opt_sloppy,  	Opt_err  }; @@ -101,10 +101,14 @@ enum {  static match_table_t nfs_mount_option_tokens = {  	{ Opt_userspace, "bg" },  	{ Opt_userspace, "fg" }, +	{ Opt_userspace, "retry=%s" }, + +	{ Opt_sloppy, "sloppy" }, +  	{ Opt_soft, "soft" },  	{ Opt_hard, "hard" }, -	{ Opt_intr, "intr" }, -	{ Opt_nointr, "nointr" }, +	{ Opt_deprecated, "intr" }, +	{ Opt_deprecated, "nointr" },  	{ Opt_posix, "posix" },  	{ Opt_noposix, "noposix" },  	{ Opt_cto, "cto" }, @@ -136,7 +140,6 @@ static match_table_t nfs_mount_option_tokens = {  	{ Opt_acdirmin, "acdirmin=%u" },  	{ Opt_acdirmax, "acdirmax=%u" },  	{ Opt_actimeo, "actimeo=%u" }, -	{ Opt_userspace, "retry=%u" },  	{ Opt_namelen, "namlen=%u" },  	{ Opt_mountport, "mountport=%u" },  	{ Opt_mountvers, "mountvers=%u" }, @@ -207,6 +210,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type,  		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);  static void nfs_kill_super(struct super_block *);  static void nfs_put_super(struct super_block *); +static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);  static struct file_system_type nfs_fs_type = {  	.owner		= THIS_MODULE, @@ -234,6 +238,7 @@ static const struct super_operations nfs_sops = {  	.umount_begin	= nfs_umount_begin,  	.show_options	= nfs_show_options,  	.show_stats	= nfs_show_stats, +	.remount_fs	= nfs_remount,  };  #ifdef CONFIG_NFS_V4 @@ -278,6 +283,7 @@ static const struct super_operations nfs4_sops = {  	.umount_begin	= nfs_umount_begin,  	.show_options	= nfs_show_options,  	.show_stats	= nfs_show_stats, +	.remount_fs	= nfs_remount,  };  #endif @@ -514,13 +520,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,  	if (nfss->bsize != 0)  		seq_printf(m, ",bsize=%u", nfss->bsize);  	seq_printf(m, ",namlen=%u", nfss->namelen); -	if (nfss->acregmin != 3*HZ || showdefaults) +	if (nfss->acregmin != NFS_DEF_ACREGMIN*HZ || showdefaults)  		seq_printf(m, ",acregmin=%u", nfss->acregmin/HZ); -	if (nfss->acregmax != 60*HZ || showdefaults) +	if (nfss->acregmax != NFS_DEF_ACREGMAX*HZ || showdefaults)  		seq_printf(m, ",acregmax=%u", nfss->acregmax/HZ); -	if (nfss->acdirmin != 30*HZ || showdefaults) +	if (nfss->acdirmin != NFS_DEF_ACDIRMIN*HZ || showdefaults)  		seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ); -	if (nfss->acdirmax != 60*HZ || showdefaults) +	if (nfss->acdirmax != NFS_DEF_ACDIRMAX*HZ || showdefaults)  		seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ);  	for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {  		if (nfss->flags & nfs_infop->flag) @@ -702,49 +708,233 @@ static int nfs_verify_server_address(struct sockaddr *addr)  	return 0;  } +static void nfs_parse_ipv4_address(char *string, size_t str_len, +				   struct sockaddr *sap, size_t *addr_len) +{ +	struct sockaddr_in *sin = (struct sockaddr_in *)sap; +	u8 *addr = (u8 *)&sin->sin_addr.s_addr; + +	if (str_len <= INET_ADDRSTRLEN) { +		dfprintk(MOUNT, "NFS: parsing IPv4 address %*s\n", +				(int)str_len, string); + +		sin->sin_family = AF_INET; +		*addr_len = sizeof(*sin); +		if (in4_pton(string, str_len, addr, '\0', NULL)) +			return; +	} + +	sap->sa_family = AF_UNSPEC; +	*addr_len = 0; +} + +#define IPV6_SCOPE_DELIMITER	'%' + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, +				    const char *delim, +				    struct sockaddr_in6 *sin6) +{ +	char *p; +	size_t len; + +	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) +		return ; +	if (*delim != IPV6_SCOPE_DELIMITER) +		return; + +	len = (string + str_len) - delim - 1; +	p = kstrndup(delim + 1, len, GFP_KERNEL); +	if (p) { +		unsigned long scope_id = 0; +		struct net_device *dev; + +		dev = dev_get_by_name(&init_net, p); +		if (dev != NULL) { +			scope_id = dev->ifindex; +			dev_put(dev); +		} else { +			/* scope_id is set to zero on error */ +			strict_strtoul(p, 10, &scope_id); +		} + +		kfree(p); +		sin6->sin6_scope_id = scope_id; +		dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); +	} +} + +static void nfs_parse_ipv6_address(char *string, size_t str_len, +				   struct sockaddr *sap, size_t *addr_len) +{ +	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; +	u8 *addr = (u8 *)&sin6->sin6_addr.in6_u; +	const char *delim; + +	if (str_len <= INET6_ADDRSTRLEN) { +		dfprintk(MOUNT, "NFS: parsing IPv6 address %*s\n", +				(int)str_len, string); + +		sin6->sin6_family = AF_INET6; +		*addr_len = sizeof(*sin6); +		if (in6_pton(string, str_len, addr, IPV6_SCOPE_DELIMITER, &delim)) { +			nfs_parse_ipv6_scope_id(string, str_len, delim, sin6); +			return; +		} +	} + +	sap->sa_family = AF_UNSPEC; +	*addr_len = 0; +} +#else +static void nfs_parse_ipv6_address(char *string, size_t str_len, +				   struct sockaddr *sap, size_t *addr_len) +{ +	sap->sa_family = AF_UNSPEC; +	*addr_len = 0; +} +#endif +  /* - * Parse string addresses passed in via a mount option, - * and construct a sockaddr based on the result. + * Construct a sockaddr based on the contents of a string that contains + * an IP address in presentation format.   * - * If address parsing fails, set the sockaddr's address - * family to AF_UNSPEC to force nfs_verify_server_address() - * to punt the mount. + * If there is a problem constructing the new sockaddr, set the address + * family to AF_UNSPEC.   */ -static void nfs_parse_server_address(char *value, -				     struct sockaddr *sap, -				     size_t *len) +static void nfs_parse_ip_address(char *string, size_t str_len, +				 struct sockaddr *sap, size_t *addr_len)  { -	if (strchr(value, ':')) { -		struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; -		u8 *addr = (u8 *)&ap->sin6_addr.in6_u; +	unsigned int i, colons; -		ap->sin6_family = AF_INET6; -		*len = sizeof(*ap); -		if (in6_pton(value, -1, addr, '\0', NULL)) -			return; -	} else { -		struct sockaddr_in *ap = (struct sockaddr_in *)sap; -		u8 *addr = (u8 *)&ap->sin_addr.s_addr; +	colons = 0; +	for (i = 0; i < str_len; i++) +		if (string[i] == ':') +			colons++; -		ap->sin_family = AF_INET; -		*len = sizeof(*ap); -		if (in4_pton(value, -1, addr, '\0', NULL)) +	if (colons >= 2) +		nfs_parse_ipv6_address(string, str_len, sap, addr_len); +	else +		nfs_parse_ipv4_address(string, str_len, sap, addr_len); +} + +/* + * Sanity check the NFS transport protocol. + * + */ +static void nfs_validate_transport_protocol(struct nfs_parsed_mount_data *mnt) +{ +	switch (mnt->nfs_server.protocol) { +	case XPRT_TRANSPORT_UDP: +	case XPRT_TRANSPORT_TCP: +	case XPRT_TRANSPORT_RDMA: +		break; +	default: +		mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; +	} +} + +/* + * For text based NFSv2/v3 mounts, the mount protocol transport default + * settings should depend upon the specified NFS transport. + */ +static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) +{ +	nfs_validate_transport_protocol(mnt); + +	if (mnt->mount_server.protocol == XPRT_TRANSPORT_UDP || +	    mnt->mount_server.protocol == XPRT_TRANSPORT_TCP)  			return; +	switch (mnt->nfs_server.protocol) { +	case XPRT_TRANSPORT_UDP: +		mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; +		break; +	case XPRT_TRANSPORT_TCP: +	case XPRT_TRANSPORT_RDMA: +		mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;  	} +} -	sap->sa_family = AF_UNSPEC; -	*len = 0; +/* + * Parse the value of the 'sec=' option. + * + * The flavor_len setting is for v4 mounts. + */ +static int nfs_parse_security_flavors(char *value, +				      struct nfs_parsed_mount_data *mnt) +{ +	substring_t args[MAX_OPT_ARGS]; + +	dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value); + +	switch (match_token(value, nfs_secflavor_tokens, args)) { +	case Opt_sec_none: +		mnt->auth_flavor_len = 0; +		mnt->auth_flavors[0] = RPC_AUTH_NULL; +		break; +	case Opt_sec_sys: +		mnt->auth_flavor_len = 0; +		mnt->auth_flavors[0] = RPC_AUTH_UNIX; +		break; +	case Opt_sec_krb5: +		mnt->auth_flavor_len = 1; +		mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; +		break; +	case Opt_sec_krb5i: +		mnt->auth_flavor_len = 1; +		mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; +		break; +	case Opt_sec_krb5p: +		mnt->auth_flavor_len = 1; +		mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; +		break; +	case Opt_sec_lkey: +		mnt->auth_flavor_len = 1; +		mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; +		break; +	case Opt_sec_lkeyi: +		mnt->auth_flavor_len = 1; +		mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; +		break; +	case Opt_sec_lkeyp: +		mnt->auth_flavor_len = 1; +		mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; +		break; +	case Opt_sec_spkm: +		mnt->auth_flavor_len = 1; +		mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; +		break; +	case Opt_sec_spkmi: +		mnt->auth_flavor_len = 1; +		mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; +		break; +	case Opt_sec_spkmp: +		mnt->auth_flavor_len = 1; +		mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; +		break; +	default: +		return 0; +	} + +	return 1; +} + +static void nfs_parse_invalid_value(const char *option) +{ +	dfprintk(MOUNT, "NFS:   bad value specified for %s option\n", option);  }  /*   * Error-check and convert a string of mount options from user space into - * a data structure + * a data structure.  The whole mount string is processed; bad options are + * skipped as they are encountered.  If there were no errors, return 1; + * otherwise return 0 (zero).   */  static int nfs_parse_mount_options(char *raw,  				   struct nfs_parsed_mount_data *mnt)  {  	char *p, *string, *secdata; -	int rc; +	int rc, sloppy = 0, errors = 0;  	if (!raw) {  		dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); @@ -777,15 +967,16 @@ static int nfs_parse_mount_options(char *raw,  		token = match_token(p, nfs_mount_option_tokens, args);  		switch (token) { + +		/* +		 * boolean options:  foo/nofoo +		 */  		case Opt_soft:  			mnt->flags |= NFS_MOUNT_SOFT;  			break;  		case Opt_hard:  			mnt->flags &= ~NFS_MOUNT_SOFT;  			break; -		case Opt_intr: -		case Opt_nointr: -			break;  		case Opt_posix:  			mnt->flags |= NFS_MOUNT_POSIX;  			break; @@ -819,20 +1010,14 @@ static int nfs_parse_mount_options(char *raw,  		case Opt_udp:  			mnt->flags &= ~NFS_MOUNT_TCP;  			mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; -			mnt->timeo = 7; -			mnt->retrans = 5;  			break;  		case Opt_tcp:  			mnt->flags |= NFS_MOUNT_TCP;  			mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; -			mnt->timeo = 600; -			mnt->retrans = 2;  			break;  		case Opt_rdma:  			mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */  			mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; -			mnt->timeo = 600; -			mnt->retrans = 2;  			break;  		case Opt_acl:  			mnt->flags &= ~NFS_MOUNT_NOACL; @@ -853,165 +1038,144 @@ static int nfs_parse_mount_options(char *raw,  			mnt->flags |= NFS_MOUNT_UNSHARED;  			break; +		/* +		 * options that take numeric values +		 */  		case Opt_port: -			if (match_int(args, &option)) -				return 0; -			if (option < 0 || option > 65535) -				return 0; -			mnt->nfs_server.port = option; +			if (match_int(args, &option) || +			    option < 0 || option > USHORT_MAX) { +				errors++; +				nfs_parse_invalid_value("port"); +			} else +				mnt->nfs_server.port = option;  			break;  		case Opt_rsize: -			if (match_int(args, &mnt->rsize)) -				return 0; +			if (match_int(args, &option) || option < 0) { +				errors++; +				nfs_parse_invalid_value("rsize"); +			} else +				mnt->rsize = option;  			break;  		case Opt_wsize: -			if (match_int(args, &mnt->wsize)) -				return 0; +			if (match_int(args, &option) || option < 0) { +				errors++; +				nfs_parse_invalid_value("wsize"); +			} else +				mnt->wsize = option;  			break;  		case Opt_bsize: -			if (match_int(args, &option)) -				return 0; -			if (option < 0) -				return 0; -			mnt->bsize = option; +			if (match_int(args, &option) || option < 0) { +				errors++; +				nfs_parse_invalid_value("bsize"); +			} else +				mnt->bsize = option;  			break;  		case Opt_timeo: -			if (match_int(args, &mnt->timeo)) -				return 0; +			if (match_int(args, &option) || option <= 0) { +				errors++; +				nfs_parse_invalid_value("timeo"); +			} else +				mnt->timeo = option;  			break;  		case Opt_retrans: -			if (match_int(args, &mnt->retrans)) -				return 0; +			if (match_int(args, &option) || option <= 0) { +				errors++; +				nfs_parse_invalid_value("retrans"); +			} else +				mnt->retrans = option;  			break;  		case Opt_acregmin: -			if (match_int(args, &mnt->acregmin)) -				return 0; +			if (match_int(args, &option) || option < 0) { +				errors++; +				nfs_parse_invalid_value("acregmin"); +			} else +				mnt->acregmin = option;  			break;  		case Opt_acregmax: -			if (match_int(args, &mnt->acregmax)) -				return 0; +			if (match_int(args, &option) || option < 0) { +				errors++; +				nfs_parse_invalid_value("acregmax"); +			} else +				mnt->acregmax = option;  			break;  		case Opt_acdirmin: -			if (match_int(args, &mnt->acdirmin)) -				return 0; +			if (match_int(args, &option) || option < 0) { +				errors++; +				nfs_parse_invalid_value("acdirmin"); +			} else +				mnt->acdirmin = option;  			break;  		case Opt_acdirmax: -			if (match_int(args, &mnt->acdirmax)) -				return 0; +			if (match_int(args, &option) || option < 0) { +				errors++; +				nfs_parse_invalid_value("acdirmax"); +			} else +				mnt->acdirmax = option;  			break;  		case Opt_actimeo: -			if (match_int(args, &option)) -				return 0; -			if (option < 0) -				return 0; -			mnt->acregmin = -			mnt->acregmax = -			mnt->acdirmin = -			mnt->acdirmax = option; +			if (match_int(args, &option) || option < 0) { +				errors++; +				nfs_parse_invalid_value("actimeo"); +			} else +				mnt->acregmin = mnt->acregmax = +				mnt->acdirmin = mnt->acdirmax = option;  			break;  		case Opt_namelen: -			if (match_int(args, &mnt->namlen)) -				return 0; +			if (match_int(args, &option) || option < 0) { +				errors++; +				nfs_parse_invalid_value("namlen"); +			} else +				mnt->namlen = option;  			break;  		case Opt_mountport: -			if (match_int(args, &option)) -				return 0; -			if (option < 0 || option > 65535) -				return 0; -			mnt->mount_server.port = option; +			if (match_int(args, &option) || +			    option < 0 || option > USHORT_MAX) { +				errors++; +				nfs_parse_invalid_value("mountport"); +			} else +				mnt->mount_server.port = option;  			break;  		case Opt_mountvers: -			if (match_int(args, &option)) -				return 0; -			if (option < 0) -				return 0; -			mnt->mount_server.version = option; +			if (match_int(args, &option) || +			    option < NFS_MNT_VERSION || +			    option > NFS_MNT3_VERSION) { +				errors++; +				nfs_parse_invalid_value("mountvers"); +			} else +				mnt->mount_server.version = option;  			break;  		case Opt_nfsvers: -			if (match_int(args, &option)) -				return 0; +			if (match_int(args, &option)) { +				errors++; +				nfs_parse_invalid_value("nfsvers"); +				break; +			}  			switch (option) { -			case 2: +			case NFS2_VERSION:  				mnt->flags &= ~NFS_MOUNT_VER3;  				break; -			case 3: +			case NFS3_VERSION:  				mnt->flags |= NFS_MOUNT_VER3;  				break;  			default: -				goto out_unrec_vers; +				errors++; +				nfs_parse_invalid_value("nfsvers");  			}  			break; +		/* +		 * options that take text values +		 */  		case Opt_sec:  			string = match_strdup(args);  			if (string == NULL)  				goto out_nomem; -			token = match_token(string, nfs_secflavor_tokens, args); +			rc = nfs_parse_security_flavors(string, mnt);  			kfree(string); - -			/* -			 * The flags setting is for v2/v3.  The flavor_len -			 * setting is for v4.  v2/v3 also need to know the -			 * difference between NULL and UNIX. -			 */ -			switch (token) { -			case Opt_sec_none: -				mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; -				mnt->auth_flavor_len = 0; -				mnt->auth_flavors[0] = RPC_AUTH_NULL; -				break; -			case Opt_sec_sys: -				mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; -				mnt->auth_flavor_len = 0; -				mnt->auth_flavors[0] = RPC_AUTH_UNIX; -				break; -			case Opt_sec_krb5: -				mnt->flags |= NFS_MOUNT_SECFLAVOUR; -				mnt->auth_flavor_len = 1; -				mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; -				break; -			case Opt_sec_krb5i: -				mnt->flags |= NFS_MOUNT_SECFLAVOUR; -				mnt->auth_flavor_len = 1; -				mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; -				break; -			case Opt_sec_krb5p: -				mnt->flags |= NFS_MOUNT_SECFLAVOUR; -				mnt->auth_flavor_len = 1; -				mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; -				break; -			case Opt_sec_lkey: -				mnt->flags |= NFS_MOUNT_SECFLAVOUR; -				mnt->auth_flavor_len = 1; -				mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; -				break; -			case Opt_sec_lkeyi: -				mnt->flags |= NFS_MOUNT_SECFLAVOUR; -				mnt->auth_flavor_len = 1; -				mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; -				break; -			case Opt_sec_lkeyp: -				mnt->flags |= NFS_MOUNT_SECFLAVOUR; -				mnt->auth_flavor_len = 1; -				mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; -				break; -			case Opt_sec_spkm: -				mnt->flags |= NFS_MOUNT_SECFLAVOUR; -				mnt->auth_flavor_len = 1; -				mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; -				break; -			case Opt_sec_spkmi: -				mnt->flags |= NFS_MOUNT_SECFLAVOUR; -				mnt->auth_flavor_len = 1; -				mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; -				break; -			case Opt_sec_spkmp: -				mnt->flags |= NFS_MOUNT_SECFLAVOUR; -				mnt->auth_flavor_len = 1; -				mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; -				break; -			default: -				goto out_unrec_sec; +			if (!rc) { +				errors++; +				dfprintk(MOUNT, "NFS:   unrecognized " +						"security flavor\n");  			}  			break;  		case Opt_proto: @@ -1026,24 +1190,20 @@ static int nfs_parse_mount_options(char *raw,  			case Opt_xprt_udp:  				mnt->flags &= ~NFS_MOUNT_TCP;  				mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; -				mnt->timeo = 7; -				mnt->retrans = 5;  				break;  			case Opt_xprt_tcp:  				mnt->flags |= NFS_MOUNT_TCP;  				mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; -				mnt->timeo = 600; -				mnt->retrans = 2;  				break;  			case Opt_xprt_rdma:  				/* vector side protocols to TCP */  				mnt->flags |= NFS_MOUNT_TCP;  				mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; -				mnt->timeo = 600; -				mnt->retrans = 2;  				break;  			default: -				goto out_unrec_xprt; +				errors++; +				dfprintk(MOUNT, "NFS:   unrecognized " +						"transport protocol\n");  			}  			break;  		case Opt_mountproto: @@ -1063,16 +1223,19 @@ static int nfs_parse_mount_options(char *raw,  				break;  			case Opt_xprt_rdma: /* not used for side protocols */  			default: -				goto out_unrec_xprt; +				errors++; +				dfprintk(MOUNT, "NFS:   unrecognized " +						"transport protocol\n");  			}  			break;  		case Opt_addr:  			string = match_strdup(args);  			if (string == NULL)  				goto out_nomem; -			nfs_parse_server_address(string, (struct sockaddr *) -						 &mnt->nfs_server.address, -						 &mnt->nfs_server.addrlen); +			nfs_parse_ip_address(string, strlen(string), +					     (struct sockaddr *) +						&mnt->nfs_server.address, +					     &mnt->nfs_server.addrlen);  			kfree(string);  			break;  		case Opt_clientaddr: @@ -1093,24 +1256,33 @@ static int nfs_parse_mount_options(char *raw,  			string = match_strdup(args);  			if (string == NULL)  				goto out_nomem; -			nfs_parse_server_address(string, (struct sockaddr *) -						 &mnt->mount_server.address, -						 &mnt->mount_server.addrlen); +			nfs_parse_ip_address(string, strlen(string), +					     (struct sockaddr *) +						&mnt->mount_server.address, +					     &mnt->mount_server.addrlen);  			kfree(string);  			break; +		/* +		 * Special options +		 */ +		case Opt_sloppy: +			sloppy = 1; +			dfprintk(MOUNT, "NFS:   relaxing parsing rules\n"); +			break;  		case Opt_userspace:  		case Opt_deprecated: +			dfprintk(MOUNT, "NFS:   ignoring mount option " +					"'%s'\n", p);  			break;  		default: -			goto out_unknown; +			errors++; +			dfprintk(MOUNT, "NFS:   unrecognized mount option " +					"'%s'\n", p);  		}  	} -	nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, -				mnt->nfs_server.port); -  	return 1;  out_nomem: @@ -1120,21 +1292,6 @@ out_security_failure:  	free_secdata(secdata);  	printk(KERN_INFO "NFS: security options invalid: %d\n", rc);  	return 0; -out_unrec_vers: -	printk(KERN_INFO "NFS: unrecognized NFS version number\n"); -	return 0; - -out_unrec_xprt: -	printk(KERN_INFO "NFS: unrecognized transport protocol\n"); -	return 0; - -out_unrec_sec: -	printk(KERN_INFO "NFS: unrecognized security flavor\n"); -	return 0; - -out_unknown: -	printk(KERN_INFO "NFS: unknown mount option: %s\n", p); -	return 0;  }  /* @@ -1188,11 +1345,146 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,  	if (status == 0)  		return 0; -	dfprintk(MOUNT, "NFS: unable to mount server %s, error %d", +	dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",  			hostname, status);  	return status;  } +static int nfs_parse_simple_hostname(const char *dev_name, +				     char **hostname, size_t maxnamlen, +				     char **export_path, size_t maxpathlen) +{ +	size_t len; +	char *colon, *comma; + +	colon = strchr(dev_name, ':'); +	if (colon == NULL) +		goto out_bad_devname; + +	len = colon - dev_name; +	if (len > maxnamlen) +		goto out_hostname; + +	/* N.B. caller will free nfs_server.hostname in all cases */ +	*hostname = kstrndup(dev_name, len, GFP_KERNEL); +	if (!*hostname) +		goto out_nomem; + +	/* kill possible hostname list: not supported */ +	comma = strchr(*hostname, ','); +	if (comma != NULL) { +		if (comma == *hostname) +			goto out_bad_devname; +		*comma = '\0'; +	} + +	colon++; +	len = strlen(colon); +	if (len > maxpathlen) +		goto out_path; +	*export_path = kstrndup(colon, len, GFP_KERNEL); +	if (!*export_path) +		goto out_nomem; + +	dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path); +	return 0; + +out_bad_devname: +	dfprintk(MOUNT, "NFS: device name not in host:path format\n"); +	return -EINVAL; + +out_nomem: +	dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); +	return -ENOMEM; + +out_hostname: +	dfprintk(MOUNT, "NFS: server hostname too long\n"); +	return -ENAMETOOLONG; + +out_path: +	dfprintk(MOUNT, "NFS: export pathname too long\n"); +	return -ENAMETOOLONG; +} + +/* + * Hostname has square brackets around it because it contains one or + * more colons.  We look for the first closing square bracket, and a + * colon must follow it. + */ +static int nfs_parse_protected_hostname(const char *dev_name, +					char **hostname, size_t maxnamlen, +					char **export_path, size_t maxpathlen) +{ +	size_t len; +	char *start, *end; + +	start = (char *)(dev_name + 1); + +	end = strchr(start, ']'); +	if (end == NULL) +		goto out_bad_devname; +	if (*(end + 1) != ':') +		goto out_bad_devname; + +	len = end - start; +	if (len > maxnamlen) +		goto out_hostname; + +	/* N.B. caller will free nfs_server.hostname in all cases */ +	*hostname = kstrndup(start, len, GFP_KERNEL); +	if (*hostname == NULL) +		goto out_nomem; + +	end += 2; +	len = strlen(end); +	if (len > maxpathlen) +		goto out_path; +	*export_path = kstrndup(end, len, GFP_KERNEL); +	if (!*export_path) +		goto out_nomem; + +	return 0; + +out_bad_devname: +	dfprintk(MOUNT, "NFS: device name not in host:path format\n"); +	return -EINVAL; + +out_nomem: +	dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); +	return -ENOMEM; + +out_hostname: +	dfprintk(MOUNT, "NFS: server hostname too long\n"); +	return -ENAMETOOLONG; + +out_path: +	dfprintk(MOUNT, "NFS: export pathname too long\n"); +	return -ENAMETOOLONG; +} + +/* + * Split "dev_name" into "hostname:export_path". + * + * The leftmost colon demarks the split between the server's hostname + * and the export path.  If the hostname starts with a left square + * bracket, then it may contain colons. + * + * Note: caller frees hostname and export path, even on error. + */ +static int nfs_parse_devname(const char *dev_name, +			     char **hostname, size_t maxnamlen, +			     char **export_path, size_t maxpathlen) +{ +	if (*dev_name == '[') +		return nfs_parse_protected_hostname(dev_name, +						    hostname, maxnamlen, +						    export_path, maxpathlen); + +	return nfs_parse_simple_hostname(dev_name, +					 hostname, maxnamlen, +					 export_path, maxpathlen); +} +  /*   * Validate the NFS2/NFS3 mount data   * - fills in the mount root filehandle @@ -1222,16 +1514,14 @@ static int nfs_validate_mount_data(void *options,  	args->flags		= (NFS_MOUNT_VER3 | NFS_MOUNT_TCP);  	args->rsize		= NFS_MAX_FILE_IO_SIZE;  	args->wsize		= NFS_MAX_FILE_IO_SIZE; -	args->timeo		= 600; -	args->retrans		= 2; -	args->acregmin		= 3; -	args->acregmax		= 60; -	args->acdirmin		= 30; -	args->acdirmax		= 60; +	args->acregmin		= NFS_DEF_ACREGMIN; +	args->acregmax		= NFS_DEF_ACREGMAX; +	args->acdirmin		= NFS_DEF_ACDIRMIN; +	args->acdirmax		= NFS_DEF_ACDIRMAX;  	args->mount_server.port	= 0;	/* autobind unless user sets port */ -	args->mount_server.protocol = XPRT_TRANSPORT_UDP;  	args->nfs_server.port	= 0;	/* autobind unless user sets port */  	args->nfs_server.protocol = XPRT_TRANSPORT_TCP; +	args->auth_flavors[0]	= RPC_AUTH_UNIX;  	switch (data->version) {  	case 1: @@ -1289,7 +1579,9 @@ static int nfs_validate_mount_data(void *options,  		args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL);  		args->namlen		= data->namlen;  		args->bsize		= data->bsize; -		args->auth_flavors[0]	= data->pseudoflavor; + +		if (data->flags & NFS_MOUNT_SECFLAVOUR) +			args->auth_flavors[0] = data->pseudoflavor;  		if (!args->nfs_server.hostname)  			goto out_nomem; @@ -1321,8 +1613,6 @@ static int nfs_validate_mount_data(void *options,  		break;  	default: { -		unsigned int len; -		char *c;  		int status;  		if (nfs_parse_mount_options((char *)options, args) == 0) @@ -1332,21 +1622,22 @@ static int nfs_validate_mount_data(void *options,  						&args->nfs_server.address))  			goto out_no_address; -		c = strchr(dev_name, ':'); -		if (c == NULL) -			return -EINVAL; -		len = c - dev_name; -		/* N.B. caller will free nfs_server.hostname in all cases */ -		args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); -		if (!args->nfs_server.hostname) -			goto out_nomem; +		nfs_set_port((struct sockaddr *)&args->nfs_server.address, +				args->nfs_server.port); -		c++; -		if (strlen(c) > NFS_MAXPATHLEN) -			return -ENAMETOOLONG; -		args->nfs_server.export_path = c; +		nfs_set_mount_transport_protocol(args); + +		status = nfs_parse_devname(dev_name, +					   &args->nfs_server.hostname, +					   PAGE_SIZE, +					   &args->nfs_server.export_path, +					   NFS_MAXPATHLEN); +		if (!status) +			status = nfs_try_mount(args, mntfh); + +		kfree(args->nfs_server.export_path); +		args->nfs_server.export_path = NULL; -		status = nfs_try_mount(args, mntfh);  		if (status)  			return status; @@ -1354,9 +1645,6 @@ static int nfs_validate_mount_data(void *options,  		}  	} -	if (!(args->flags & NFS_MOUNT_SECFLAVOUR)) -		args->auth_flavors[0] = RPC_AUTH_UNIX; -  #ifndef CONFIG_NFS_V3  	if (args->flags & NFS_MOUNT_VER3)  		goto out_v3_not_compiled; @@ -1396,6 +1684,80 @@ out_invalid_fh:  	return -EINVAL;  } +static int +nfs_compare_remount_data(struct nfs_server *nfss, +			 struct nfs_parsed_mount_data *data) +{ +	if (data->flags != nfss->flags || +	    data->rsize != nfss->rsize || +	    data->wsize != nfss->wsize || +	    data->retrans != nfss->client->cl_timeout->to_retries || +	    data->auth_flavors[0] != nfss->client->cl_auth->au_flavor || +	    data->acregmin != nfss->acregmin / HZ || +	    data->acregmax != nfss->acregmax / HZ || +	    data->acdirmin != nfss->acdirmin / HZ || +	    data->acdirmax != nfss->acdirmax / HZ || +	    data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) || +	    data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen || +	    memcmp(&data->nfs_server.address, &nfss->nfs_client->cl_addr, +		   data->nfs_server.addrlen) != 0) +		return -EINVAL; + +	return 0; +} + +static int +nfs_remount(struct super_block *sb, int *flags, char *raw_data) +{ +	int error; +	struct nfs_server *nfss = sb->s_fs_info; +	struct nfs_parsed_mount_data *data; +	struct nfs_mount_data *options = (struct nfs_mount_data *)raw_data; +	struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; +	u32 nfsvers = nfss->nfs_client->rpc_ops->version; + +	/* +	 * Userspace mount programs that send binary options generally send +	 * them populated with default values. We have no way to know which +	 * ones were explicitly specified. Fall back to legacy behavior and +	 * just return success. +	 */ +	if ((nfsvers == 4 && options4->version == 1) || +	    (nfsvers <= 3 && options->version >= 1 && +	     options->version <= 6)) +		return 0; + +	data = kzalloc(sizeof(*data), GFP_KERNEL); +	if (data == NULL) +		return -ENOMEM; + +	/* fill out struct with values from existing mount */ +	data->flags = nfss->flags; +	data->rsize = nfss->rsize; +	data->wsize = nfss->wsize; +	data->retrans = nfss->client->cl_timeout->to_retries; +	data->auth_flavors[0] = nfss->client->cl_auth->au_flavor; +	data->acregmin = nfss->acregmin / HZ; +	data->acregmax = nfss->acregmax / HZ; +	data->acdirmin = nfss->acdirmin / HZ; +	data->acdirmax = nfss->acdirmax / HZ; +	data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; +	data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; +	memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, +		data->nfs_server.addrlen); + +	/* overwrite those values with any that were specified */ +	error = nfs_parse_mount_options((char *)options, data); +	if (error < 0) +		goto out; + +	/* compare new mount options with old ones */ +	error = nfs_compare_remount_data(nfss, data); +out: +	kfree(data); +	return error; +} +  /*   * Initialise the common bits of the superblock   */ @@ -1811,14 +2173,13 @@ static int nfs4_validate_mount_data(void *options,  	args->rsize		= NFS_MAX_FILE_IO_SIZE;  	args->wsize		= NFS_MAX_FILE_IO_SIZE; -	args->timeo		= 600; -	args->retrans		= 2; -	args->acregmin		= 3; -	args->acregmax		= 60; -	args->acdirmin		= 30; -	args->acdirmax		= 60; +	args->acregmin		= NFS_DEF_ACREGMIN; +	args->acregmax		= NFS_DEF_ACREGMAX; +	args->acdirmin		= NFS_DEF_ACDIRMIN; +	args->acdirmax		= NFS_DEF_ACDIRMAX;  	args->nfs_server.port	= NFS_PORT; /* 2049 unless user set port= */ -	args->nfs_server.protocol = XPRT_TRANSPORT_TCP; +	args->auth_flavors[0]	= RPC_AUTH_UNIX; +	args->auth_flavor_len	= 0;  	switch (data->version) {  	case 1: @@ -1834,18 +2195,13 @@ static int nfs4_validate_mount_data(void *options,  						&args->nfs_server.address))  			goto out_no_address; -		switch (data->auth_flavourlen) { -		case 0: -			args->auth_flavors[0] = RPC_AUTH_UNIX; -			break; -		case 1: +		if (data->auth_flavourlen) { +			if (data->auth_flavourlen > 1) +				goto out_inval_auth;  			if (copy_from_user(&args->auth_flavors[0],  					   data->auth_flavours,  					   sizeof(args->auth_flavors[0])))  				return -EFAULT; -			break; -		default: -			goto out_inval_auth;  		}  		c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); @@ -1879,10 +2235,11 @@ static int nfs4_validate_mount_data(void *options,  		args->acdirmin	= data->acdirmin;  		args->acdirmax	= data->acdirmax;  		args->nfs_server.protocol = data->proto; +		nfs_validate_transport_protocol(args);  		break;  	default: { -		unsigned int len; +		int status;  		if (nfs_parse_mount_options((char *)options, args) == 0)  			return -EINVAL; @@ -1891,44 +2248,25 @@ static int nfs4_validate_mount_data(void *options,  						&args->nfs_server.address))  			return -EINVAL; -		switch (args->auth_flavor_len) { -		case 0: -			args->auth_flavors[0] = RPC_AUTH_UNIX; -			break; -		case 1: -			break; -		default: -			goto out_inval_auth; -		} +		nfs_set_port((struct sockaddr *)&args->nfs_server.address, +				args->nfs_server.port); -		/* -		 * Split "dev_name" into "hostname:mntpath". -		 */ -		c = strchr(dev_name, ':'); -		if (c == NULL) -			return -EINVAL; -		/* while calculating len, pretend ':' is '\0' */ -		len = c - dev_name; -		if (len > NFS4_MAXNAMLEN) -			return -ENAMETOOLONG; -		/* N.B. caller will free nfs_server.hostname in all cases */ -		args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); -		if (!args->nfs_server.hostname) -			goto out_nomem; - -		c++;			/* step over the ':' */ -		len = strlen(c); -		if (len > NFS4_MAXPATHLEN) -			return -ENAMETOOLONG; -		args->nfs_server.export_path = kstrndup(c, len, GFP_KERNEL); -		if (!args->nfs_server.export_path) -			goto out_nomem; +		nfs_validate_transport_protocol(args); -		dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path); +		if (args->auth_flavor_len > 1) +			goto out_inval_auth;  		if (args->client_address == NULL)  			goto out_no_client_address; +		status = nfs_parse_devname(dev_name, +					   &args->nfs_server.hostname, +					   NFS4_MAXNAMLEN, +					   &args->nfs_server.export_path, +					   NFS4_MAXPATHLEN); +		if (status < 0) +			return status; +  		break;  		}  	} @@ -1944,10 +2282,6 @@ out_inval_auth:  		 data->auth_flavourlen);  	return -EINVAL; -out_nomem: -	dfprintk(MOUNT, "NFS4: not enough memory to handle mount options\n"); -	return -ENOMEM; -  out_no_address:  	dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");  	return -EINVAL; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f333848fd3be..feca8c648766 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -34,9 +34,6 @@  /*   * Local function declarations   */ -static struct nfs_page * nfs_update_request(struct nfs_open_context*, -					    struct page *, -					    unsigned int, unsigned int);  static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,  				  struct inode *inode, int ioflags);  static void nfs_redirty_request(struct nfs_page *req); @@ -169,29 +166,6 @@ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int  	SetPageUptodate(page);  } -static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, -		unsigned int offset, unsigned int count) -{ -	struct nfs_page	*req; -	int ret; - -	for (;;) { -		req = nfs_update_request(ctx, page, offset, count); -		if (!IS_ERR(req)) -			break; -		ret = PTR_ERR(req); -		if (ret != -EBUSY) -			return ret; -		ret = nfs_wb_page(page->mapping->host, page); -		if (ret != 0) -			return ret; -	} -	/* Update file length */ -	nfs_grow_file(page, offset, count); -	nfs_clear_page_tag_locked(req); -	return 0; -} -  static int wb_priority(struct writeback_control *wbc)  {  	if (wbc->for_reclaim) @@ -268,12 +242,9 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,  			return ret;  		spin_lock(&inode->i_lock);  	} -	if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { -		/* This request is marked for commit */ +	if (test_bit(PG_CLEAN, &req->wb_flags)) {  		spin_unlock(&inode->i_lock); -		nfs_clear_page_tag_locked(req); -		nfs_pageio_complete(pgio); -		return 0; +		BUG();  	}  	if (nfs_set_page_writeback(page) != 0) {  		spin_unlock(&inode->i_lock); @@ -355,11 +326,19 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)  /*   * Insert a write request into an inode   */ -static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) +static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)  {  	struct nfs_inode *nfsi = NFS_I(inode);  	int error; +	error = radix_tree_preload(GFP_NOFS); +	if (error != 0) +		goto out; + +	/* Lock the request! */ +	nfs_lock_request_dontget(req); + +	spin_lock(&inode->i_lock);  	error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);  	BUG_ON(error);  	if (!nfsi->npages) { @@ -373,6 +352,10 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)  	kref_get(&req->wb_kref);  	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,  				NFS_PAGE_TAG_LOCKED); +	spin_unlock(&inode->i_lock); +	radix_tree_preload_end(); +out: +	return error;  }  /* @@ -405,19 +388,6 @@ nfs_mark_request_dirty(struct nfs_page *req)  	__set_page_dirty_nobuffers(req->wb_page);  } -/* - * Check if a request is dirty - */ -static inline int -nfs_dirty_request(struct nfs_page *req) -{ -	struct page *page = req->wb_page; - -	if (page == NULL || test_bit(PG_NEED_COMMIT, &req->wb_flags)) -		return 0; -	return !PageWriteback(page); -} -  #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)  /*   * Add a request to the inode's commit list. @@ -430,7 +400,7 @@ nfs_mark_request_commit(struct nfs_page *req)  	spin_lock(&inode->i_lock);  	nfsi->ncommit++; -	set_bit(PG_NEED_COMMIT, &(req)->wb_flags); +	set_bit(PG_CLEAN, &(req)->wb_flags);  	radix_tree_tag_set(&nfsi->nfs_page_tree,  			req->wb_index,  			NFS_PAGE_TAG_COMMIT); @@ -440,6 +410,19 @@ nfs_mark_request_commit(struct nfs_page *req)  	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);  } +static int +nfs_clear_request_commit(struct nfs_page *req) +{ +	struct page *page = req->wb_page; + +	if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { +		dec_zone_page_state(page, NR_UNSTABLE_NFS); +		dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); +		return 1; +	} +	return 0; +} +  static inline  int nfs_write_need_commit(struct nfs_write_data *data)  { @@ -449,7 +432,7 @@ int nfs_write_need_commit(struct nfs_write_data *data)  static inline  int nfs_reschedule_unstable_write(struct nfs_page *req)  { -	if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { +	if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {  		nfs_mark_request_commit(req);  		return 1;  	} @@ -465,6 +448,12 @@ nfs_mark_request_commit(struct nfs_page *req)  {  } +static inline int +nfs_clear_request_commit(struct nfs_page *req) +{ +	return 0; +} +  static inline  int nfs_write_need_commit(struct nfs_write_data *data)  { @@ -522,11 +511,8 @@ static void nfs_cancel_commit_list(struct list_head *head)  	while(!list_empty(head)) {  		req = nfs_list_entry(head->next); -		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); -		dec_bdi_stat(req->wb_page->mapping->backing_dev_info, -				BDI_RECLAIMABLE);  		nfs_list_remove_request(req); -		clear_bit(PG_NEED_COMMIT, &(req)->wb_flags); +		nfs_clear_request_commit(req);  		nfs_inode_remove_request(req);  		nfs_unlock_request(req);  	} @@ -564,110 +550,124 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pg  #endif  /* - * Try to update any existing write request, or create one if there is none. - * In order to match, the request's credentials must match those of - * the calling process. + * Search for an existing write request, and attempt to update + * it to reflect a new dirty region on a given page.   * - * Note: Should always be called with the Page Lock held! + * If the attempt fails, then the existing request is flushed out + * to disk.   */ -static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, -		struct page *page, unsigned int offset, unsigned int bytes) +static struct nfs_page *nfs_try_to_update_request(struct inode *inode, +		struct page *page, +		unsigned int offset, +		unsigned int bytes)  { -	struct address_space *mapping = page->mapping; -	struct inode *inode = mapping->host; -	struct nfs_page		*req, *new = NULL; -	pgoff_t		rqend, end; +	struct nfs_page *req; +	unsigned int rqend; +	unsigned int end; +	int error; + +	if (!PagePrivate(page)) +		return NULL;  	end = offset + bytes; +	spin_lock(&inode->i_lock);  	for (;;) { -		/* Loop over all inode entries and see if we find -		 * A request for the page we wish to update +		req = nfs_page_find_request_locked(page); +		if (req == NULL) +			goto out_unlock; + +		rqend = req->wb_offset + req->wb_bytes; +		/* +		 * Tell the caller to flush out the request if +		 * the offsets are non-contiguous. +		 * Note: nfs_flush_incompatible() will already +		 * have flushed out requests having wrong owners.  		 */ -		if (new) { -			if (radix_tree_preload(GFP_NOFS)) { -				nfs_release_request(new); -				return ERR_PTR(-ENOMEM); -			} -		} +		if (offset > rqend +		    || end < req->wb_offset) +			goto out_flushme; -		spin_lock(&inode->i_lock); -		req = nfs_page_find_request_locked(page); -		if (req) { -			if (!nfs_set_page_tag_locked(req)) { -				int error; - -				spin_unlock(&inode->i_lock); -				error = nfs_wait_on_request(req); -				nfs_release_request(req); -				if (error < 0) { -					if (new) { -						radix_tree_preload_end(); -						nfs_release_request(new); -					} -					return ERR_PTR(error); -				} -				continue; -			} -			spin_unlock(&inode->i_lock); -			if (new) { -				radix_tree_preload_end(); -				nfs_release_request(new); -			} +		if (nfs_set_page_tag_locked(req))  			break; -		} -		if (new) { -			nfs_lock_request_dontget(new); -			nfs_inode_add_request(inode, new); -			spin_unlock(&inode->i_lock); -			radix_tree_preload_end(); -			req = new; -			goto zero_page; -		} +		/* The request is locked, so wait and then retry */  		spin_unlock(&inode->i_lock); - -		new = nfs_create_request(ctx, inode, page, offset, bytes); -		if (IS_ERR(new)) -			return new; +		error = nfs_wait_on_request(req); +		nfs_release_request(req); +		if (error != 0) +			goto out_err; +		spin_lock(&inode->i_lock);  	} -	/* We have a request for our page. -	 * If the creds don't match, or the -	 * page addresses don't match, -	 * tell the caller to wait on the conflicting -	 * request. -	 */ -	rqend = req->wb_offset + req->wb_bytes; -	if (req->wb_context != ctx -	    || req->wb_page != page -	    || !nfs_dirty_request(req) -	    || offset > rqend || end < req->wb_offset) { -		nfs_clear_page_tag_locked(req); -		return ERR_PTR(-EBUSY); -	} +	if (nfs_clear_request_commit(req)) +		radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, +				req->wb_index, NFS_PAGE_TAG_COMMIT);  	/* Okay, the request matches. Update the region */  	if (offset < req->wb_offset) {  		req->wb_offset = offset;  		req->wb_pgbase = offset; -		req->wb_bytes = max(end, rqend) - req->wb_offset; -		goto zero_page;  	} -  	if (end > rqend)  		req->wb_bytes = end - req->wb_offset; - +	else +		req->wb_bytes = rqend - req->wb_offset; +out_unlock: +	spin_unlock(&inode->i_lock);  	return req; -zero_page: -	/* If this page might potentially be marked as up to date, -	 * then we need to zero any uninitalised data. */ -	if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE -			&& !PageUptodate(req->wb_page)) -		zero_user_segment(req->wb_page, req->wb_bytes, PAGE_CACHE_SIZE); +out_flushme: +	spin_unlock(&inode->i_lock); +	nfs_release_request(req); +	error = nfs_wb_page(inode, page); +out_err: +	return ERR_PTR(error); +} + +/* + * Try to update an existing write request, or create one if there is none. + * + * Note: Should always be called with the Page Lock held to prevent races + * if we have to add a new request. Also assumes that the caller has + * already called nfs_flush_incompatible() if necessary. + */ +static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, +		struct page *page, unsigned int offset, unsigned int bytes) +{ +	struct inode *inode = page->mapping->host; +	struct nfs_page	*req; +	int error; + +	req = nfs_try_to_update_request(inode, page, offset, bytes); +	if (req != NULL) +		goto out; +	req = nfs_create_request(ctx, inode, page, offset, bytes); +	if (IS_ERR(req)) +		goto out; +	error = nfs_inode_add_request(inode, req); +	if (error != 0) { +		nfs_release_request(req); +		req = ERR_PTR(error); +	} +out:  	return req;  } +static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, +		unsigned int offset, unsigned int count) +{ +	struct nfs_page	*req; + +	req = nfs_setup_write_request(ctx, page, offset, count); +	if (IS_ERR(req)) +		return PTR_ERR(req); +	/* Update file length */ +	nfs_grow_file(page, offset, count); +	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); +	nfs_clear_page_tag_locked(req); +	return 0; +} +  int nfs_flush_incompatible(struct file *file, struct page *page)  {  	struct nfs_open_context *ctx = nfs_file_open_context(file); @@ -685,8 +685,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)  		req = nfs_page_find_request(page);  		if (req == NULL)  			return 0; -		do_flush = req->wb_page != page || req->wb_context != ctx -			|| !nfs_dirty_request(req); +		do_flush = req->wb_page != page || req->wb_context != ctx;  		nfs_release_request(req);  		if (!do_flush)  			return 0; @@ -721,10 +720,10 @@ int nfs_updatepage(struct file *file, struct page *page,  	nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); -	dprintk("NFS:      nfs_updatepage(%s/%s %d@%Ld)\n", +	dprintk("NFS:       nfs_updatepage(%s/%s %d@%lld)\n",  		file->f_path.dentry->d_parent->d_name.name,  		file->f_path.dentry->d_name.name, count, -		(long long)(page_offset(page) +offset)); +		(long long)(page_offset(page) + offset));  	/* If we're not using byte range locks, and we know the page  	 * is up to date, it may be more efficient to extend the write @@ -744,7 +743,7 @@ int nfs_updatepage(struct file *file, struct page *page,  	else  		__set_page_dirty_nobuffers(page); -        dprintk("NFS:      nfs_updatepage returns %d (isize %Ld)\n", +	dprintk("NFS:       nfs_updatepage returns %d (isize %lld)\n",  			status, (long long)i_size_read(inode));  	return status;  } @@ -752,12 +751,7 @@ int nfs_updatepage(struct file *file, struct page *page,  static void nfs_writepage_release(struct nfs_page *req)  { -	if (PageError(req->wb_page)) { -		nfs_end_page_writeback(req->wb_page); -		nfs_inode_remove_request(req); -	} else if (!nfs_reschedule_unstable_write(req)) { -		/* Set the PG_uptodate flag */ -		nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes); +	if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) {  		nfs_end_page_writeback(req->wb_page);  		nfs_inode_remove_request(req);  	} else @@ -834,7 +828,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,  	NFS_PROTO(inode)->write_setup(data, &msg);  	dprintk("NFS: %5u initiated write call " -		"(req %s/%Ld, %u bytes @ offset %Lu)\n", +		"(req %s/%lld, %u bytes @ offset %llu)\n",  		data->task.tk_pid,  		inode->i_sb->s_id,  		(long long)NFS_FILEID(inode), @@ -978,13 +972,13 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,  static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)  {  	struct nfs_write_data	*data = calldata; -	struct nfs_page		*req = data->req; -	dprintk("NFS: write (%s/%Ld %d@%Ld)", -		req->wb_context->path.dentry->d_inode->i_sb->s_id, -		(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), -		req->wb_bytes, -		(long long)req_offset(req)); +	dprintk("NFS: %5u write(%s/%lld %d@%lld)", +		task->tk_pid, +		data->req->wb_context->path.dentry->d_inode->i_sb->s_id, +		(long long) +		  NFS_FILEID(data->req->wb_context->path.dentry->d_inode), +		data->req->wb_bytes, (long long)req_offset(data->req));  	nfs_writeback_done(task, data);  } @@ -1058,7 +1052,8 @@ static void nfs_writeback_release_full(void *calldata)  		nfs_list_remove_request(req); -		dprintk("NFS: write (%s/%Ld %d@%Ld)", +		dprintk("NFS: %5u write (%s/%lld %d@%lld)", +			data->task.tk_pid,  			req->wb_context->path.dentry->d_inode->i_sb->s_id,  			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),  			req->wb_bytes, @@ -1078,8 +1073,6 @@ static void nfs_writeback_release_full(void *calldata)  			dprintk(" marked for commit\n");  			goto next;  		} -		/* Set the PG_uptodate flag? */ -		nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);  		dprintk(" OK\n");  remove_request:  		nfs_end_page_writeback(page); @@ -1133,7 +1126,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)  		static unsigned long    complain;  		if (time_before(complain, jiffies)) { -			dprintk("NFS: faulty NFS server %s:" +			dprintk("NFS:       faulty NFS server %s:"  				" (committed = %d) != (stable = %d)\n",  				NFS_SERVER(data->inode)->nfs_client->cl_hostname,  				resp->verf->committed, argp->stable); @@ -1297,12 +1290,9 @@ static void nfs_commit_release(void *calldata)  	while (!list_empty(&data->pages)) {  		req = nfs_list_entry(data->pages.next);  		nfs_list_remove_request(req); -		clear_bit(PG_NEED_COMMIT, &(req)->wb_flags); -		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); -		dec_bdi_stat(req->wb_page->mapping->backing_dev_info, -				BDI_RECLAIMABLE); +		nfs_clear_request_commit(req); -		dprintk("NFS: commit (%s/%Ld %d@%Ld)", +		dprintk("NFS:       commit (%s/%lld %d@%lld)",  			req->wb_context->path.dentry->d_inode->i_sb->s_id,  			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),  			req->wb_bytes, @@ -1318,9 +1308,6 @@ static void nfs_commit_release(void *calldata)  		 * returned by the server against all stored verfs. */  		if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {  			/* We have a match */ -			/* Set the PG_uptodate flag */ -			nfs_mark_uptodate(req->wb_page, req->wb_pgbase, -					req->wb_bytes);  			nfs_inode_remove_request(req);  			dprintk(" OK\n");  			goto next; @@ -1479,7 +1466,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)  		req = nfs_page_find_request(page);  		if (req == NULL)  			goto out; -		if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { +		if (test_bit(PG_CLEAN, &req->wb_flags)) {  			nfs_release_request(req);  			break;  		} diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 4d4760e687c3..702fa577aa6e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -381,7 +381,7 @@ static int do_probe_callback(void *data)  		.program	= &cb_program,  		.version	= nfs_cb_version[1]->number,  		.authflavor	= RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ -		.flags		= (RPC_CLNT_CREATE_NOPING), +		.flags		= (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),  	};  	struct rpc_message msg = {  		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], diff --git a/include/linux/inet.h b/include/linux/inet.h index 1354080cf8cf..4cca05c9678e 100644 --- a/include/linux/inet.h +++ b/include/linux/inet.h @@ -44,6 +44,13 @@  #include <linux/types.h> +/* + * These mimic similar macros defined in user-space for inet_ntop(3). + * See /usr/include/netinet/in.h . + */ +#define INET_ADDRSTRLEN		(16) +#define INET6_ADDRSTRLEN	(48) +  extern __be32 in_aton(const char *str);  extern int in4_pton(const char *src, int srclen, u8 *dst, int delim, const char **end);  extern int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char **end); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 27d6a8d98cef..29d261918734 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -12,9 +12,19 @@  #include <linux/magic.h>  /* Default timeout values */ +#define NFS_DEF_UDP_TIMEO	(11) +#define NFS_DEF_UDP_RETRANS	(3) +#define NFS_DEF_TCP_TIMEO	(600) +#define NFS_DEF_TCP_RETRANS	(2) +  #define NFS_MAX_UDP_TIMEOUT	(60*HZ)  #define NFS_MAX_TCP_TIMEOUT	(600*HZ) +#define NFS_DEF_ACREGMIN	(3) +#define NFS_DEF_ACREGMAX	(60) +#define NFS_DEF_ACDIRMIN	(30) +#define NFS_DEF_ACDIRMAX	(60) +  /*   * When flushing a cluster of dirty pages, there can be different   * strategies: diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h new file mode 100644 index 000000000000..1cb9a3fed2b3 --- /dev/null +++ b/include/linux/nfs_iostat.h @@ -0,0 +1,119 @@ +/* + *  User-space visible declarations for NFS client per-mount + *  point statistics + * + *  Copyright (C) 2005, 2006 Chuck Lever <[email protected]> + * + *  NFS client per-mount statistics provide information about the + *  health of the NFS client and the health of each NFS mount point. + *  Generally these are not for detailed problem diagnosis, but + *  simply to indicate that there is a problem. + * + *  These counters are not meant to be human-readable, but are meant + *  to be integrated into system monitoring tools such as "sar" and + *  "iostat".  As such, the counters are sampled by the tools over + *  time, and are never zeroed after a file system is mounted. + *  Moving averages can be computed by the tools by taking the + *  difference between two instantaneous samples  and dividing that + *  by the time between the samples. + */ + +#ifndef _LINUX_NFS_IOSTAT +#define _LINUX_NFS_IOSTAT + +#define NFS_IOSTAT_VERS		"1.0" + +/* + * NFS byte counters + * + * 1.  SERVER - the number of payload bytes read from or written + *     to the server by the NFS client via an NFS READ or WRITE + *     request. + * + * 2.  NORMAL - the number of bytes read or written by applications + *     via the read(2) and write(2) system call interfaces. + * + * 3.  DIRECT - the number of bytes read or written from files + *     opened with the O_DIRECT flag. + * + * These counters give a view of the data throughput into and out + * of the NFS client.  Comparing the number of bytes requested by + * an application with the number of bytes the client requests from + * the server can provide an indication of client efficiency + * (per-op, cache hits, etc). + * + * These counters can also help characterize which access methods + * are in use.  DIRECT by itself shows whether there is any O_DIRECT + * traffic.  NORMAL + DIRECT shows how much data is going through + * the system call interface.  A large amount of SERVER traffic + * without much NORMAL or DIRECT traffic shows that applications + * are using mapped files. + * + * NFS page counters + * + * These count the number of pages read or written via nfs_readpage(), + * nfs_readpages(), or their write equivalents. + * + * NB: When adding new byte counters, please include the measured + * units in the name of each byte counter to help users of this + * interface determine what exactly is being counted. + */ +enum nfs_stat_bytecounters { +	NFSIOS_NORMALREADBYTES = 0, +	NFSIOS_NORMALWRITTENBYTES, +	NFSIOS_DIRECTREADBYTES, +	NFSIOS_DIRECTWRITTENBYTES, +	NFSIOS_SERVERREADBYTES, +	NFSIOS_SERVERWRITTENBYTES, +	NFSIOS_READPAGES, +	NFSIOS_WRITEPAGES, +	__NFSIOS_BYTESMAX, +}; + +/* + * NFS event counters + * + * These counters provide a low-overhead way of monitoring client + * activity without enabling NFS trace debugging.  The counters + * show the rate at which VFS requests are made, and how often the + * client invalidates its data and attribute caches.  This allows + * system administrators to monitor such things as how close-to-open + * is working, and answer questions such as "why are there so many + * GETATTR requests on the wire?" + * + * They also count anamolous events such as short reads and writes, + * silly renames due to close-after-delete, and operations that + * change the size of a file (such operations can often be the + * source of data corruption if applications aren't using file + * locking properly). + */ +enum nfs_stat_eventcounters { +	NFSIOS_INODEREVALIDATE = 0, +	NFSIOS_DENTRYREVALIDATE, +	NFSIOS_DATAINVALIDATE, +	NFSIOS_ATTRINVALIDATE, +	NFSIOS_VFSOPEN, +	NFSIOS_VFSLOOKUP, +	NFSIOS_VFSACCESS, +	NFSIOS_VFSUPDATEPAGE, +	NFSIOS_VFSREADPAGE, +	NFSIOS_VFSREADPAGES, +	NFSIOS_VFSWRITEPAGE, +	NFSIOS_VFSWRITEPAGES, +	NFSIOS_VFSGETDENTS, +	NFSIOS_VFSSETATTR, +	NFSIOS_VFSFLUSH, +	NFSIOS_VFSFSYNC, +	NFSIOS_VFSLOCK, +	NFSIOS_VFSRELEASE, +	NFSIOS_CONGESTIONWAIT, +	NFSIOS_SETATTRTRUNC, +	NFSIOS_EXTENDWRITE, +	NFSIOS_SILLYRENAME, +	NFSIOS_SHORTREAD, +	NFSIOS_SHORTWRITE, +	NFSIOS_DELAY, +	__NFSIOS_COUNTSMAX, +}; + +#endif	/* _LINUX_NFS_IOSTAT */ diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index a1676e19e491..3c60685d972b 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -27,9 +27,12 @@  /*   * Valid flags for a dirty buffer   */ -#define PG_BUSY			0 -#define PG_NEED_COMMIT		1 -#define PG_NEED_RESCHED		2 +enum { +	PG_BUSY = 0, +	PG_CLEAN, +	PG_NEED_COMMIT, +	PG_NEED_RESCHED, +};  struct nfs_inode;  struct nfs_page { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 24263bb8e0be..8c77c11224d1 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -829,9 +829,8 @@ struct nfs_rpc_ops {  	int	(*write_done)  (struct rpc_task *, struct nfs_write_data *);  	void	(*commit_setup) (struct nfs_write_data *, struct rpc_message *);  	int	(*commit_done) (struct rpc_task *, struct nfs_write_data *); -	int	(*file_open)   (struct inode *, struct file *); -	int	(*file_release) (struct inode *, struct file *);  	int	(*lock)(struct file *, int, struct file_lock *); +	int	(*lock_check_bounds)(const struct file_lock *);  	void	(*clear_acl_cache)(struct inode *);  }; diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 6fff7f82ef12..e5bfe01ee305 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -42,7 +42,8 @@ struct rpc_clnt {  	unsigned int		cl_softrtry : 1,/* soft timeouts */  				cl_discrtry : 1,/* disconnect before retry */ -				cl_autobind : 1;/* use getport() */ +				cl_autobind : 1,/* use getport() */ +				cl_chatty   : 1;/* be verbose */  	struct rpc_rtt *	cl_rtt;		/* RTO estimator data */  	const struct rpc_timeout *cl_timeout;	/* Timeout strategy */ @@ -114,6 +115,7 @@ struct rpc_create_args {  #define RPC_CLNT_CREATE_NONPRIVPORT	(1UL << 3)  #define RPC_CLNT_CREATE_NOPING		(1UL << 4)  #define RPC_CLNT_CREATE_DISCRTRY	(1UL << 5) +#define RPC_CLNT_CREATE_QUIET		(1UL << 6)  struct rpc_clnt *rpc_create(struct rpc_create_args *args);  struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *, @@ -123,6 +125,9 @@ void		rpc_shutdown_client(struct rpc_clnt *);  void		rpc_release_client(struct rpc_clnt *);  int		rpcb_register(u32, u32, int, unsigned short, int *); +int		rpcb_v4_register(const u32 program, const u32 version, +				 const struct sockaddr *address, +				 const char *netid, int *result);  int		rpcb_getport_sync(struct sockaddr_in *, u32, u32, int);  void		rpcb_getport_async(struct rpc_task *); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index d1a5c8c1a0f1..64981a2f1cae 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -135,7 +135,6 @@ struct rpc_task_setup {  #define RPC_IS_SWAPPER(t)	((t)->tk_flags & RPC_TASK_SWAPPER)  #define RPC_DO_ROOTOVERRIDE(t)	((t)->tk_flags & RPC_TASK_ROOTCREDS)  #define RPC_ASSASSINATED(t)	((t)->tk_flags & RPC_TASK_KILLED) -#define RPC_DO_CALLBACK(t)	((t)->tk_callback != NULL)  #define RPC_IS_SOFT(t)		((t)->tk_flags & RPC_TASK_SOFT)  #define RPC_TASK_RUNNING	0 diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index cc12d5f5d5da..834a83199bdf 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -63,22 +63,11 @@ static const struct rpc_credops gss_nullops;  # define RPCDBG_FACILITY	RPCDBG_AUTH  #endif -#define NFS_NGROUPS	16 - -#define GSS_CRED_SLACK		1024		/* XXX: unused */ +#define GSS_CRED_SLACK		1024  /* length of a krb5 verifier (48), plus data added before arguments when   * using integrity (two 4-byte integers): */  #define GSS_VERF_SLACK		100 -/* XXX this define must match the gssd define -* as it is passed to gssd to signal the use of -* machine creds should be part of the shared rpc interface */ - -#define CA_RUN_AS_MACHINE  0x00000200 - -/* dump the buffer in `emacs-hexl' style */ -#define isprint(c)      ((c > 0x1f) && (c < 0x7f)) -  struct gss_auth {  	struct kref kref;  	struct rpc_auth rpc_auth; @@ -146,7 +135,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)  	q = (const void *)((const char *)p + len);  	if (unlikely(q > end || q < p))  		return ERR_PTR(-EFAULT); -	dest->data = kmemdup(p, len, GFP_KERNEL); +	dest->data = kmemdup(p, len, GFP_NOFS);  	if (unlikely(dest->data == NULL))  		return ERR_PTR(-ENOMEM);  	dest->len = len; @@ -171,7 +160,7 @@ gss_alloc_context(void)  {  	struct gss_cl_ctx *ctx; -	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); +	ctx = kzalloc(sizeof(*ctx), GFP_NOFS);  	if (ctx != NULL) {  		ctx->gc_proc = RPC_GSS_PROC_DATA;  		ctx->gc_seq = 1;	/* NetApp 6.4R1 doesn't accept seq. no. 0 */ @@ -272,7 +261,7 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid)  	return NULL;  } -/* Try to add a upcall to the pipefs queue. +/* Try to add an upcall to the pipefs queue.   * If an upcall owned by our uid already exists, then we return a reference   * to that upcall instead of adding the new upcall.   */ @@ -341,7 +330,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid)  {  	struct gss_upcall_msg *gss_msg; -	gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL); +	gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS);  	if (gss_msg != NULL) {  		INIT_LIST_HEAD(&gss_msg->list);  		rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); @@ -493,7 +482,6 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)  {  	const void *p, *end;  	void *buf; -	struct rpc_clnt *clnt;  	struct gss_upcall_msg *gss_msg;  	struct inode *inode = filp->f_path.dentry->d_inode;  	struct gss_cl_ctx *ctx; @@ -503,11 +491,10 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)  	if (mlen > MSG_BUF_MAXSIZE)  		goto out;  	err = -ENOMEM; -	buf = kmalloc(mlen, GFP_KERNEL); +	buf = kmalloc(mlen, GFP_NOFS);  	if (!buf)  		goto out; -	clnt = RPC_I(inode)->private;  	err = -EFAULT;  	if (copy_from_user(buf, src, mlen))  		goto err; @@ -806,7 +793,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)  	dprintk("RPC:       gss_create_cred for uid %d, flavor %d\n",  		acred->uid, auth->au_flavor); -	if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL))) +	if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS)))  		goto out_err;  	rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 60c3dba545d7..ef45eba22485 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -70,7 +70,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)  	q = (const void *)((const char *)p + len);  	if (unlikely(q > end || q < p))  		return ERR_PTR(-EFAULT); -	res->data = kmemdup(p, len, GFP_KERNEL); +	res->data = kmemdup(p, len, GFP_NOFS);  	if (unlikely(res->data == NULL))  		return ERR_PTR(-ENOMEM);  	res->len = len; @@ -131,7 +131,7 @@ gss_import_sec_context_kerberos(const void *p,  	struct	krb5_ctx *ctx;  	int tmp; -	if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) +	if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS)))  		goto out_err;  	p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate)); diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 5deb4b6e4514..035e1dd6af1b 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -76,7 +76,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)  	q = (const void *)((const char *)p + len);  	if (unlikely(q > end || q < p))  		return ERR_PTR(-EFAULT); -	res->data = kmemdup(p, len, GFP_KERNEL); +	res->data = kmemdup(p, len, GFP_NOFS);  	if (unlikely(res->data == NULL))  		return ERR_PTR(-ENOMEM);  	return q; @@ -90,7 +90,7 @@ gss_import_sec_context_spkm3(const void *p, size_t len,  	struct	spkm3_ctx *ctx;  	int	version; -	if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) +	if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS)))  		goto out_err;  	p = simple_get_bytes(p, end, &version, sizeof(version)); diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c index 6cdd241ad267..3308157436d2 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_token.c +++ b/net/sunrpc/auth_gss/gss_spkm3_token.c @@ -90,7 +90,7 @@ asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits)  int  decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen)  { -	if (!(out->data = kzalloc(explen,GFP_KERNEL))) +	if (!(out->data = kzalloc(explen,GFP_NOFS)))  		return 0;  	out->len = explen;  	memcpy(out->data, in, enclen); diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 44920b90bdc4..46b2647c5bd2 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -66,7 +66,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)  	dprintk("RPC:       allocating UNIX cred for uid %d gid %d\n",  			acred->uid, acred->gid); -	if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) +	if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS)))  		return ERR_PTR(-ENOMEM);  	rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8945307556ec..76739e928d0d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -25,6 +25,7 @@  #include <linux/module.h>  #include <linux/types.h> +#include <linux/kallsyms.h>  #include <linux/mm.h>  #include <linux/slab.h>  #include <linux/smp_lock.h> @@ -58,7 +59,6 @@ static void	call_start(struct rpc_task *task);  static void	call_reserve(struct rpc_task *task);  static void	call_reserveresult(struct rpc_task *task);  static void	call_allocate(struct rpc_task *task); -static void	call_encode(struct rpc_task *task);  static void	call_decode(struct rpc_task *task);  static void	call_bind(struct rpc_task *task);  static void	call_bind_status(struct rpc_task *task); @@ -70,9 +70,9 @@ static void	call_refreshresult(struct rpc_task *task);  static void	call_timeout(struct rpc_task *task);  static void	call_connect(struct rpc_task *task);  static void	call_connect_status(struct rpc_task *task); -static __be32 *	call_header(struct rpc_task *task); -static __be32 *	call_verify(struct rpc_task *task); +static __be32	*rpc_encode_header(struct rpc_task *task); +static __be32	*rpc_verify_header(struct rpc_task *task);  static int	rpc_ping(struct rpc_clnt *clnt, int flags);  static void rpc_register_client(struct rpc_clnt *clnt) @@ -324,6 +324,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)  		clnt->cl_autobind = 1;  	if (args->flags & RPC_CLNT_CREATE_DISCRTRY)  		clnt->cl_discrtry = 1; +	if (!(args->flags & RPC_CLNT_CREATE_QUIET)) +		clnt->cl_chatty = 1;  	return clnt;  } @@ -690,6 +692,21 @@ rpc_restart_call(struct rpc_task *task)  }  EXPORT_SYMBOL_GPL(rpc_restart_call); +#ifdef RPC_DEBUG +static const char *rpc_proc_name(const struct rpc_task *task) +{ +	const struct rpc_procinfo *proc = task->tk_msg.rpc_proc; + +	if (proc) { +		if (proc->p_name) +			return proc->p_name; +		else +			return "NULL"; +	} else +		return "no proc"; +} +#endif +  /*   * 0.  Initial state   * @@ -701,9 +718,9 @@ call_start(struct rpc_task *task)  {  	struct rpc_clnt	*clnt = task->tk_client; -	dprintk("RPC: %5u call_start %s%d proc %d (%s)\n", task->tk_pid, +	dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,  			clnt->cl_protname, clnt->cl_vers, -			task->tk_msg.rpc_proc->p_proc, +			rpc_proc_name(task),  			(RPC_IS_ASYNC(task) ? "async" : "sync"));  	/* Increment call count */ @@ -861,7 +878,7 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)   * 3.	Encode arguments of an RPC call   */  static void -call_encode(struct rpc_task *task) +rpc_xdr_encode(struct rpc_task *task)  {  	struct rpc_rqst	*req = task->tk_rqstp;  	kxdrproc_t	encode; @@ -876,23 +893,19 @@ call_encode(struct rpc_task *task)  			 (char *)req->rq_buffer + req->rq_callsize,  			 req->rq_rcvsize); -	/* Encode header and provided arguments */ -	encode = task->tk_msg.rpc_proc->p_encode; -	if (!(p = call_header(task))) { -		printk(KERN_INFO "RPC: call_header failed, exit EIO\n"); +	p = rpc_encode_header(task); +	if (p == NULL) { +		printk(KERN_INFO "RPC: couldn't encode RPC header, exit EIO\n");  		rpc_exit(task, -EIO);  		return;  	} + +	encode = task->tk_msg.rpc_proc->p_encode;  	if (encode == NULL)  		return;  	task->tk_status = rpcauth_wrap_req(task, encode, req, p,  			task->tk_msg.rpc_argp); -	if (task->tk_status == -ENOMEM) { -		/* XXX: Is this sane? */ -		rpc_delay(task, 3*HZ); -		task->tk_status = -EAGAIN; -	}  }  /* @@ -929,11 +942,9 @@ call_bind_status(struct rpc_task *task)  	}  	switch (task->tk_status) { -	case -EAGAIN: -		dprintk("RPC: %5u rpcbind waiting for another request " -				"to finish\n", task->tk_pid); -		/* avoid busy-waiting here -- could be a network outage. */ -		rpc_delay(task, 5*HZ); +	case -ENOMEM: +		dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid); +		rpc_delay(task, HZ >> 2);  		goto retry_timeout;  	case -EACCES:  		dprintk("RPC: %5u remote rpcbind: RPC program/version " @@ -1046,10 +1057,16 @@ call_transmit(struct rpc_task *task)  	/* Encode here so that rpcsec_gss can use correct sequence number. */  	if (rpc_task_need_encode(task)) {  		BUG_ON(task->tk_rqstp->rq_bytes_sent != 0); -		call_encode(task); +		rpc_xdr_encode(task);  		/* Did the encode result in an error condition? */ -		if (task->tk_status != 0) +		if (task->tk_status != 0) { +			/* Was the error nonfatal? */ +			if (task->tk_status == -EAGAIN) +				rpc_delay(task, HZ >> 4); +			else +				rpc_exit(task, task->tk_status);  			return; +		}  	}  	xprt_transmit(task);  	if (task->tk_status < 0) @@ -1132,7 +1149,8 @@ call_status(struct rpc_task *task)  		rpc_exit(task, status);  		break;  	default: -		printk("%s: RPC call returned error %d\n", +		if (clnt->cl_chatty) +			printk("%s: RPC call returned error %d\n",  			       clnt->cl_protname, -status);  		rpc_exit(task, status);  	} @@ -1157,7 +1175,8 @@ call_timeout(struct rpc_task *task)  	task->tk_timeouts++;  	if (RPC_IS_SOFT(task)) { -		printk(KERN_NOTICE "%s: server %s not responding, timed out\n", +		if (clnt->cl_chatty) +			printk(KERN_NOTICE "%s: server %s not responding, timed out\n",  				clnt->cl_protname, clnt->cl_server);  		rpc_exit(task, -EIO);  		return; @@ -1165,7 +1184,8 @@ call_timeout(struct rpc_task *task)  	if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {  		task->tk_flags |= RPC_CALL_MAJORSEEN; -		printk(KERN_NOTICE "%s: server %s not responding, still trying\n", +		if (clnt->cl_chatty) +			printk(KERN_NOTICE "%s: server %s not responding, still trying\n",  			clnt->cl_protname, clnt->cl_server);  	}  	rpc_force_rebind(clnt); @@ -1196,8 +1216,9 @@ call_decode(struct rpc_task *task)  			task->tk_pid, task->tk_status);  	if (task->tk_flags & RPC_CALL_MAJORSEEN) { -		printk(KERN_NOTICE "%s: server %s OK\n", -			clnt->cl_protname, clnt->cl_server); +		if (clnt->cl_chatty) +			printk(KERN_NOTICE "%s: server %s OK\n", +				clnt->cl_protname, clnt->cl_server);  		task->tk_flags &= ~RPC_CALL_MAJORSEEN;  	} @@ -1224,8 +1245,7 @@ call_decode(struct rpc_task *task)  		goto out_retry;  	} -	/* Verify the RPC header */ -	p = call_verify(task); +	p = rpc_verify_header(task);  	if (IS_ERR(p)) {  		if (p == ERR_PTR(-EAGAIN))  			goto out_retry; @@ -1243,7 +1263,7 @@ call_decode(struct rpc_task *task)  	return;  out_retry:  	task->tk_status = 0; -	/* Note: call_verify() may have freed the RPC slot */ +	/* Note: rpc_verify_header() may have freed the RPC slot */  	if (task->tk_rqstp == req) {  		req->rq_received = req->rq_rcv_buf.len = 0;  		if (task->tk_client->cl_discrtry) @@ -1290,11 +1310,8 @@ call_refreshresult(struct rpc_task *task)  	return;  } -/* - * Call header serialization - */  static __be32 * -call_header(struct rpc_task *task) +rpc_encode_header(struct rpc_task *task)  {  	struct rpc_clnt *clnt = task->tk_client;  	struct rpc_rqst	*req = task->tk_rqstp; @@ -1314,11 +1331,8 @@ call_header(struct rpc_task *task)  	return p;  } -/* - * Reply header verification - */  static __be32 * -call_verify(struct rpc_task *task) +rpc_verify_header(struct rpc_task *task)  {  	struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];  	int len = task->tk_rqstp->rq_rcv_buf.len >> 2; @@ -1392,7 +1406,7 @@ call_verify(struct rpc_task *task)  			task->tk_action = call_bind;  			goto out_retry;  		case RPC_AUTH_TOOWEAK: -			printk(KERN_NOTICE "call_verify: server %s requires stronger " +			printk(KERN_NOTICE "RPC: server %s requires stronger "  			       "authentication.\n", task->tk_client->cl_server);  			break;  		default: @@ -1431,10 +1445,10 @@ call_verify(struct rpc_task *task)  		error = -EPROTONOSUPPORT;  		goto out_err;  	case RPC_PROC_UNAVAIL: -		dprintk("RPC: %5u %s: proc %p unsupported by program %u, " +		dprintk("RPC: %5u %s: proc %s unsupported by program %u, "  				"version %u on server %s\n",  				task->tk_pid, __func__, -				task->tk_msg.rpc_proc, +				rpc_proc_name(task),  				task->tk_client->cl_prog,  				task->tk_client->cl_vers,  				task->tk_client->cl_server); @@ -1517,44 +1531,53 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int  EXPORT_SYMBOL_GPL(rpc_call_null);  #ifdef RPC_DEBUG +static void rpc_show_header(void) +{ +	printk(KERN_INFO "-pid- flgs status -client- --rqstp- " +		"-timeout ---ops--\n"); +} + +static void rpc_show_task(const struct rpc_clnt *clnt, +			  const struct rpc_task *task) +{ +	const char *rpc_waitq = "none"; +	char *p, action[KSYM_SYMBOL_LEN]; + +	if (RPC_IS_QUEUED(task)) +		rpc_waitq = rpc_qname(task->tk_waitqueue); + +	/* map tk_action pointer to a function name; then trim off +	 * the "+0x0 [sunrpc]" */ +	sprint_symbol(action, (unsigned long)task->tk_action); +	p = strchr(action, '+'); +	if (p) +		*p = '\0'; + +	printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%s q:%s\n", +		task->tk_pid, task->tk_flags, task->tk_status, +		clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops, +		clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task), +		action, rpc_waitq); +} +  void rpc_show_tasks(void)  {  	struct rpc_clnt *clnt; -	struct rpc_task *t; +	struct rpc_task *task; +	int header = 0;  	spin_lock(&rpc_client_lock); -	if (list_empty(&all_clients)) -		goto out; -	printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " -		"-rpcwait -action- ---ops--\n");  	list_for_each_entry(clnt, &all_clients, cl_clients) { -		if (list_empty(&clnt->cl_tasks)) -			continue;  		spin_lock(&clnt->cl_lock); -		list_for_each_entry(t, &clnt->cl_tasks, tk_task) { -			const char *rpc_waitq = "none"; -			int proc; - -			if (t->tk_msg.rpc_proc) -				proc = t->tk_msg.rpc_proc->p_proc; -			else -				proc = -1; - -			if (RPC_IS_QUEUED(t)) -				rpc_waitq = rpc_qname(t->tk_waitqueue); - -			printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", -				t->tk_pid, proc, -				t->tk_flags, t->tk_status, -				t->tk_client, -				(t->tk_client ? t->tk_client->cl_prog : 0), -				t->tk_rqstp, t->tk_timeout, -				rpc_waitq, -				t->tk_action, t->tk_ops); +		list_for_each_entry(task, &clnt->cl_tasks, tk_task) { +			if (!header) { +				rpc_show_header(); +				header++; +			} +			rpc_show_task(clnt, task);  		}  		spin_unlock(&clnt->cl_lock);  	} -out:  	spin_unlock(&rpc_client_lock);  }  #endif diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index e6fb21b19b86..24db2b4d12d3 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -32,6 +32,10 @@  #define RPCBIND_PROGRAM		(100000u)  #define RPCBIND_PORT		(111u) +#define RPCBVERS_2		(2u) +#define RPCBVERS_3		(3u) +#define RPCBVERS_4		(4u) +  enum {  	RPCBPROC_NULL,  	RPCBPROC_SET, @@ -64,6 +68,7 @@ enum {  #define RPCB_MAXOWNERLEN	sizeof(RPCB_OWNER_STRING)  static void			rpcb_getport_done(struct rpc_task *, void *); +static void			rpcb_map_release(void *data);  static struct rpc_program	rpcb_program;  struct rpcbind_args { @@ -76,41 +81,73 @@ struct rpcbind_args {  	const char *		r_netid;  	const char *		r_addr;  	const char *		r_owner; + +	int			r_status;  };  static struct rpc_procinfo rpcb_procedures2[];  static struct rpc_procinfo rpcb_procedures3[]; +static struct rpc_procinfo rpcb_procedures4[];  struct rpcb_info { -	int			rpc_vers; +	u32			rpc_vers;  	struct rpc_procinfo *	rpc_proc;  };  static struct rpcb_info rpcb_next_version[];  static struct rpcb_info rpcb_next_version6[]; +static const struct rpc_call_ops rpcb_getport_ops = { +	.rpc_call_done		= rpcb_getport_done, +	.rpc_release		= rpcb_map_release, +}; + +static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status) +{ +	xprt_clear_binding(xprt); +	rpc_wake_up_status(&xprt->binding, status); +} +  static void rpcb_map_release(void *data)  {  	struct rpcbind_args *map = data; +	rpcb_wake_rpcbind_waiters(map->r_xprt, map->r_status);  	xprt_put(map->r_xprt);  	kfree(map);  } -static const struct rpc_call_ops rpcb_getport_ops = { -	.rpc_call_done		= rpcb_getport_done, -	.rpc_release		= rpcb_map_release, +static const struct sockaddr_in rpcb_inaddr_loopback = { +	.sin_family		= AF_INET, +	.sin_addr.s_addr	= htonl(INADDR_LOOPBACK), +	.sin_port		= htons(RPCBIND_PORT),  }; -static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status) +static const struct sockaddr_in6 rpcb_in6addr_loopback = { +	.sin6_family		= AF_INET6, +	.sin6_addr		= IN6ADDR_LOOPBACK_INIT, +	.sin6_port		= htons(RPCBIND_PORT), +}; + +static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, +					  size_t addrlen, u32 version)  { -	xprt_clear_binding(xprt); -	rpc_wake_up_status(&xprt->binding, status); +	struct rpc_create_args args = { +		.protocol	= XPRT_TRANSPORT_UDP, +		.address	= addr, +		.addrsize	= addrlen, +		.servername	= "localhost", +		.program	= &rpcb_program, +		.version	= version, +		.authflavor	= RPC_AUTH_UNIX, +		.flags		= RPC_CLNT_CREATE_NOPING, +	}; + +	return rpc_create(&args);  }  static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, -				    size_t salen, int proto, u32 version, -				    int privileged) +				    size_t salen, int proto, u32 version)  {  	struct rpc_create_args args = {  		.protocol	= proto, @@ -120,7 +157,8 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,  		.program	= &rpcb_program,  		.version	= version,  		.authflavor	= RPC_AUTH_UNIX, -		.flags		= RPC_CLNT_CREATE_NOPING, +		.flags		= (RPC_CLNT_CREATE_NOPING | +					RPC_CLNT_CREATE_NONPRIVPORT),  	};  	switch (srvaddr->sa_family) { @@ -134,29 +172,72 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,  		return NULL;  	} -	if (!privileged) -		args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;  	return rpc_create(&args);  } +static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, +			      u32 version, struct rpc_message *msg, +			      int *result) +{ +	struct rpc_clnt *rpcb_clnt; +	int error = 0; + +	*result = 0; + +	rpcb_clnt = rpcb_create_local(addr, addrlen, version); +	if (!IS_ERR(rpcb_clnt)) { +		error = rpc_call_sync(rpcb_clnt, msg, 0); +		rpc_shutdown_client(rpcb_clnt); +	} else +		error = PTR_ERR(rpcb_clnt); + +	if (error < 0) +		printk(KERN_WARNING "RPC: failed to contact local rpcbind " +				"server (errno %d).\n", -error); +	dprintk("RPC:       registration status %d/%d\n", error, *result); + +	return error; +} +  /**   * rpcb_register - set or unset a port registration with the local rpcbind svc   * @prog: RPC program number to bind   * @vers: RPC version number to bind - * @prot: transport protocol to use to make this request + * @prot: transport protocol to register   * @port: port value to register - * @okay: result code + * @okay: OUT: result code + * + * RPC services invoke this function to advertise their contact + * information via the system's rpcbind daemon.  RPC services + * invoke this function once for each [program, version, transport] + * tuple they wish to advertise. + * + * Callers may also unregister RPC services that are no longer + * available by setting the passed-in port to zero.  This removes + * all registered transports for [program, version] from the local + * rpcbind database. + * + * Returns zero if the registration request was dispatched + * successfully and a reply was received.  The rpcbind daemon's + * boolean result code is stored in *okay. + * + * Returns an errno value and sets *result to zero if there was + * some problem that prevented the rpcbind request from being + * dispatched, or if the rpcbind daemon did not respond within + * the timeout.   * - * port == 0 means unregister, port != 0 means register. + * This function uses rpcbind protocol version 2 to contact the + * local rpcbind daemon.   * - * This routine supports only rpcbind version 2. + * Registration works over both AF_INET and AF_INET6, and services + * registered via this function are advertised as available for any + * address.  If the local rpcbind daemon is listening on AF_INET6, + * services registered via this function will be advertised on + * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 + * addresses).   */  int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)  { -	struct sockaddr_in sin = { -		.sin_family		= AF_INET, -		.sin_addr.s_addr	= htonl(INADDR_LOOPBACK), -	};  	struct rpcbind_args map = {  		.r_prog		= prog,  		.r_vers		= vers, @@ -164,32 +245,159 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)  		.r_port		= port,  	};  	struct rpc_message msg = { -		.rpc_proc	= &rpcb_procedures2[port ? -					RPCBPROC_SET : RPCBPROC_UNSET],  		.rpc_argp	= &map,  		.rpc_resp	= okay,  	}; -	struct rpc_clnt *rpcb_clnt; -	int error = 0;  	dprintk("RPC:       %sregistering (%u, %u, %d, %u) with local "  			"rpcbind\n", (port ? "" : "un"),  			prog, vers, prot, port); -	rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, -				sizeof(sin), XPRT_TRANSPORT_UDP, 2, 1); -	if (IS_ERR(rpcb_clnt)) -		return PTR_ERR(rpcb_clnt); +	msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; +	if (port) +		msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; -	error = rpc_call_sync(rpcb_clnt, &msg, 0); +	return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, +					sizeof(rpcb_inaddr_loopback), +					RPCBVERS_2, &msg, okay); +} -	rpc_shutdown_client(rpcb_clnt); -	if (error < 0) -		printk(KERN_WARNING "RPC: failed to contact local rpcbind " -				"server (errno %d).\n", -error); -	dprintk("RPC:       registration status %d/%d\n", error, *okay); +/* + * Fill in AF_INET family-specific arguments to register + */ +static int rpcb_register_netid4(struct sockaddr_in *address_to_register, +				struct rpc_message *msg) +{ +	struct rpcbind_args *map = msg->rpc_argp; +	unsigned short port = ntohs(address_to_register->sin_port); +	char buf[32]; + +	/* Construct AF_INET universal address */ +	snprintf(buf, sizeof(buf), +			NIPQUAD_FMT".%u.%u", +			NIPQUAD(address_to_register->sin_addr.s_addr), +			port >> 8, port & 0xff); +	map->r_addr = buf; + +	dprintk("RPC:       %sregistering [%u, %u, %s, '%s'] with " +		"local rpcbind\n", (port ? "" : "un"), +			map->r_prog, map->r_vers, +			map->r_addr, map->r_netid); + +	msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; +	if (port) +		msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + +	return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, +					sizeof(rpcb_inaddr_loopback), +					RPCBVERS_4, msg, msg->rpc_resp); +} -	return error; +/* + * Fill in AF_INET6 family-specific arguments to register + */ +static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, +				struct rpc_message *msg) +{ +	struct rpcbind_args *map = msg->rpc_argp; +	unsigned short port = ntohs(address_to_register->sin6_port); +	char buf[64]; + +	/* Construct AF_INET6 universal address */ +	snprintf(buf, sizeof(buf), +			NIP6_FMT".%u.%u", +			NIP6(address_to_register->sin6_addr), +			port >> 8, port & 0xff); +	map->r_addr = buf; + +	dprintk("RPC:       %sregistering [%u, %u, %s, '%s'] with " +		"local rpcbind\n", (port ? "" : "un"), +			map->r_prog, map->r_vers, +			map->r_addr, map->r_netid); + +	msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; +	if (port) +		msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + +	return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, +					sizeof(rpcb_in6addr_loopback), +					RPCBVERS_4, msg, msg->rpc_resp); +} + +/** + * rpcb_v4_register - set or unset a port registration with the local rpcbind + * @program: RPC program number of service to (un)register + * @version: RPC version number of service to (un)register + * @address: address family, IP address, and port to (un)register + * @netid: netid of transport protocol to (un)register + * @result: result code from rpcbind RPC call + * + * RPC services invoke this function to advertise their contact + * information via the system's rpcbind daemon.  RPC services + * invoke this function once for each [program, version, address, + * netid] tuple they wish to advertise. + * + * Callers may also unregister RPC services that are no longer + * available by setting the port number in the passed-in address + * to zero.  Callers pass a netid of "" to unregister all + * transport netids associated with [program, version, address]. + * + * Returns zero if the registration request was dispatched + * successfully and a reply was received.  The rpcbind daemon's + * result code is stored in *result. + * + * Returns an errno value and sets *result to zero if there was + * some problem that prevented the rpcbind request from being + * dispatched, or if the rpcbind daemon did not respond within + * the timeout. + * + * This function uses rpcbind protocol version 4 to contact the + * local rpcbind daemon.  The local rpcbind daemon must support + * version 4 of the rpcbind protocol in order for these functions + * to register a service successfully. + * + * Supported netids include "udp" and "tcp" for UDP and TCP over + * IPv4, and "udp6" and "tcp6" for UDP and TCP over IPv6, + * respectively. + * + * The contents of @address determine the address family and the + * port to be registered.  The usual practice is to pass INADDR_ANY + * as the raw address, but specifying a non-zero address is also + * supported by this API if the caller wishes to advertise an RPC + * service on a specific network interface. + * + * Note that passing in INADDR_ANY does not create the same service + * registration as IN6ADDR_ANY.  The former advertises an RPC + * service on any IPv4 address, but not on IPv6.  The latter + * advertises the service on all IPv4 and IPv6 addresses. + */ +int rpcb_v4_register(const u32 program, const u32 version, +		     const struct sockaddr *address, const char *netid, +		     int *result) +{ +	struct rpcbind_args map = { +		.r_prog		= program, +		.r_vers		= version, +		.r_netid	= netid, +		.r_owner	= RPCB_OWNER_STRING, +	}; +	struct rpc_message msg = { +		.rpc_argp	= &map, +		.rpc_resp	= result, +	}; + +	*result = 0; + +	switch (address->sa_family) { +	case AF_INET: +		return rpcb_register_netid4((struct sockaddr_in *)address, +					    &msg); +	case AF_INET6: +		return rpcb_register_netid6((struct sockaddr_in6 *)address, +					    &msg); +	} + +	return -EAFNOSUPPORT;  }  /** @@ -227,7 +435,7 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)  		__func__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);  	rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin, -				sizeof(*sin), prot, 2, 0); +				sizeof(*sin), prot, RPCBVERS_2);  	if (IS_ERR(rpcb_clnt))  		return PTR_ERR(rpcb_clnt); @@ -289,17 +497,16 @@ void rpcb_getport_async(struct rpc_task *task)  	/* Autobind on cloned rpc clients is discouraged */  	BUG_ON(clnt->cl_parent != clnt); +	/* Put self on the wait queue to ensure we get notified if +	 * some other task is already attempting to bind the port */ +	rpc_sleep_on(&xprt->binding, task, NULL); +  	if (xprt_test_and_set_binding(xprt)) { -		status = -EAGAIN;	/* tell caller to check again */  		dprintk("RPC: %5u %s: waiting for another binder\n",  			task->tk_pid, __func__); -		goto bailout_nowake; +		return;  	} -	/* Put self on queue before sending rpcbind request, in case -	 * rpcb_getport_done completes before we return from rpc_run_task */ -	rpc_sleep_on(&xprt->binding, task, NULL); -  	/* Someone else may have bound if we slept */  	if (xprt_bound(xprt)) {  		status = 0; @@ -338,7 +545,7 @@ void rpcb_getport_async(struct rpc_task *task)  		task->tk_pid, __func__, bind_version);  	rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot, -				bind_version, 0); +				bind_version);  	if (IS_ERR(rpcb_clnt)) {  		status = PTR_ERR(rpcb_clnt);  		dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", @@ -361,15 +568,15 @@ void rpcb_getport_async(struct rpc_task *task)  	map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);  	map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);  	map->r_owner = RPCB_OWNER_STRING;	/* ignored for GETADDR */ +	map->r_status = -EIO;  	child = rpcb_call_async(rpcb_clnt, map, proc);  	rpc_release_client(rpcb_clnt);  	if (IS_ERR(child)) { -		status = -EIO;  		/* rpcb_map_release() has freed the arguments */  		dprintk("RPC: %5u %s: rpc_run_task failed\n",  			task->tk_pid, __func__); -		goto bailout_nofree; +		return;  	}  	rpc_put_task(child); @@ -378,7 +585,6 @@ void rpcb_getport_async(struct rpc_task *task)  bailout_nofree:  	rpcb_wake_rpcbind_waiters(xprt, status); -bailout_nowake:  	task->tk_status = status;  }  EXPORT_SYMBOL_GPL(rpcb_getport_async); @@ -417,9 +623,13 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)  	dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n",  			child->tk_pid, status, map->r_port); -	rpcb_wake_rpcbind_waiters(xprt, status); +	map->r_status = status;  } +/* + * XDR functions for rpcbind + */ +  static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p,  			       struct rpcbind_args *rpcb)  { @@ -438,7 +648,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p,  			       unsigned short *portp)  {  	*portp = (unsigned short) ntohl(*p++); -	dprintk("RPC:      rpcb_decode_getport result %u\n", +	dprintk("RPC:       rpcb_decode_getport result %u\n",  			*portp);  	return 0;  } @@ -447,8 +657,8 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,  			   unsigned int *boolp)  {  	*boolp = (unsigned int) ntohl(*p++); -	dprintk("RPC:      rpcb_decode_set result %u\n", -			*boolp); +	dprintk("RPC:       rpcb_decode_set: call %s\n", +			(*boolp ? "succeeded" : "failed"));  	return 0;  } @@ -571,52 +781,60 @@ out_err:  static struct rpc_procinfo rpcb_procedures2[] = {  	PROC(SET,		mapping,	set),  	PROC(UNSET,		mapping,	set), -	PROC(GETADDR,		mapping,	getport), +	PROC(GETPORT,		mapping,	getport),  };  static struct rpc_procinfo rpcb_procedures3[] = { -	PROC(SET,		mapping,	set), -	PROC(UNSET,		mapping,	set), +	PROC(SET,		getaddr,	set), +	PROC(UNSET,		getaddr,	set),  	PROC(GETADDR,		getaddr,	getaddr),  };  static struct rpc_procinfo rpcb_procedures4[] = { -	PROC(SET,		mapping,	set), -	PROC(UNSET,		mapping,	set), +	PROC(SET,		getaddr,	set), +	PROC(UNSET,		getaddr,	set), +	PROC(GETADDR,		getaddr,	getaddr),  	PROC(GETVERSADDR,	getaddr,	getaddr),  };  static struct rpcb_info rpcb_next_version[] = { -#ifdef CONFIG_SUNRPC_BIND34 -	{ 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, -	{ 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, -#endif -	{ 2, &rpcb_procedures2[RPCBPROC_GETPORT] }, -	{ 0, NULL }, +	{ +		.rpc_vers	= RPCBVERS_2, +		.rpc_proc	= &rpcb_procedures2[RPCBPROC_GETPORT], +	}, +	{ +		.rpc_proc	= NULL, +	},  };  static struct rpcb_info rpcb_next_version6[] = { -#ifdef CONFIG_SUNRPC_BIND34 -	{ 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, -	{ 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, -#endif -	{ 0, NULL }, +	{ +		.rpc_vers	= RPCBVERS_4, +		.rpc_proc	= &rpcb_procedures4[RPCBPROC_GETADDR], +	}, +	{ +		.rpc_vers	= RPCBVERS_3, +		.rpc_proc	= &rpcb_procedures3[RPCBPROC_GETADDR], +	}, +	{ +		.rpc_proc	= NULL, +	},  };  static struct rpc_version rpcb_version2 = { -	.number		= 2, +	.number		= RPCBVERS_2,  	.nrprocs	= RPCB_HIGHPROC_2,  	.procs		= rpcb_procedures2  };  static struct rpc_version rpcb_version3 = { -	.number		= 3, +	.number		= RPCBVERS_3,  	.nrprocs	= RPCB_HIGHPROC_3,  	.procs		= rpcb_procedures3  };  static struct rpc_version rpcb_version4 = { -	.number		= 4, +	.number		= RPCBVERS_4,  	.nrprocs	= RPCB_HIGHPROC_4,  	.procs		= rpcb_procedures4  }; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6eab9bf94baf..6288af05c20f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -626,19 +626,15 @@ static void __rpc_execute(struct rpc_task *task)  		/*  		 * Execute any pending callback.  		 */ -		if (RPC_DO_CALLBACK(task)) { -			/* Define a callback save pointer */ +		if (task->tk_callback) {  			void (*save_callback)(struct rpc_task *);  			/* -			 * If a callback exists, save it, reset it, -			 * call it. -			 * The save is needed to stop from resetting -			 * another callback set within the callback handler -			 * - Dave +			 * We set tk_callback to NULL before calling it, +			 * in case it sets the tk_callback field itself:  			 */ -			save_callback=task->tk_callback; -			task->tk_callback=NULL; +			save_callback = task->tk_callback; +			task->tk_callback = NULL;  			save_callback(task);  		} diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e1770f7ba0b3..99a52aabe332 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -690,7 +690,7 @@ static void xprt_connect_status(struct rpc_task *task)  {  	struct rpc_xprt	*xprt = task->tk_xprt; -	if (task->tk_status >= 0) { +	if (task->tk_status == 0) {  		xprt->stat.connect_count++;  		xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start;  		dprintk("RPC: %5u xprt_connect_status: connection established\n", @@ -699,12 +699,6 @@ static void xprt_connect_status(struct rpc_task *task)  	}  	switch (task->tk_status) { -	case -ECONNREFUSED: -	case -ECONNRESET: -		dprintk("RPC: %5u xprt_connect_status: server %s refused " -				"connection\n", task->tk_pid, -				task->tk_client->cl_server); -		break;  	case -ENOTCONN:  		dprintk("RPC: %5u xprt_connect_status: connection broken\n",  				task->tk_pid); @@ -878,6 +872,7 @@ void xprt_transmit(struct rpc_task *task)  		return;  	req->rq_connect_cookie = xprt->connect_cookie; +	req->rq_xtime = jiffies;  	status = xprt->ops->send_request(task);  	if (status == 0) {  		dprintk("RPC: %5u xmit complete\n", task->tk_pid); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ddbe981ab516..4486c59c3aca 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -579,7 +579,6 @@ static int xs_udp_send_request(struct rpc_task *task)  				req->rq_svec->iov_base,  				req->rq_svec->iov_len); -	req->rq_xtime = jiffies;  	status = xs_sendpages(transport->sock,  			      xs_addr(xprt),  			      xprt->addrlen, xdr, @@ -671,7 +670,6 @@ static int xs_tcp_send_request(struct rpc_task *task)  	 * to cope with writespace callbacks arriving _after_ we have  	 * called sendmsg(). */  	while (1) { -		req->rq_xtime = jiffies;  		status = xs_sendpages(transport->sock,  					NULL, 0, xdr, req->rq_bytes_sent); |