diff options
99 files changed, 784 insertions, 553 deletions
| diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt index 277d1e810670..c0bd5677271b 100644 --- a/Documentation/filesystems/caching/backend-api.txt +++ b/Documentation/filesystems/caching/backend-api.txt @@ -676,6 +676,29 @@ FS-Cache provides some utilities that a cache backend may make use of:       as possible. + (*) Indicate that a stale object was found and discarded: + +	void fscache_object_retrying_stale(struct fscache_object *object); + +     This is called to indicate that the lookup procedure found an object in +     the cache that the netfs decided was stale.  The object has been +     discarded from the cache and the lookup will be performed again. + + + (*) Indicate that the caching backend killed an object: + +	void fscache_object_mark_killed(struct fscache_object *object, +					enum fscache_why_object_killed why); + +     This is called to indicate that the cache backend preemptively killed an +     object.  The why parameter should be set to indicate the reason: + +	FSCACHE_OBJECT_IS_STALE - the object was stale and needs discarding. +	FSCACHE_OBJECT_NO_SPACE - there was insufficient cache space +	FSCACHE_OBJECT_WAS_RETIRED - the object was retired when relinquished. +	FSCACHE_OBJECT_WAS_CULLED - the object was culled to make space. + +   (*) Get and release references on a retrieval record:  	void fscache_get_retrieval(struct fscache_retrieval *op); diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 770267af5b3e..50f0a5757f48 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -284,8 +284,9 @@ proc files.  		enq=N	Number of times async ops queued for processing  		can=N	Number of async ops cancelled  		rej=N	Number of async ops rejected due to object lookup/create failure +		ini=N	Number of async ops initialised  		dfr=N	Number of async ops queued for deferred release -		rel=N	Number of async ops released +		rel=N	Number of async ops released (should equal ini=N when idle)  		gc=N	Number of deferred-release async ops garbage collected  	CacheOp	alo=N	Number of in-progress alloc_object() cache ops  		luo=N	Number of in-progress lookup_object() cache ops @@ -303,6 +304,10 @@ proc files.  		wrp=N	Number of in-progress write_page() cache ops  		ucp=N	Number of in-progress uncache_page() cache ops  		dsp=N	Number of in-progress dissociate_pages() cache ops +	CacheEv	nsp=N	Number of object lookups/creations rejected due to lack of space +		stl=N	Number of stale objects deleted +		rtr=N	Number of objects retired when relinquished +		cul=N	Number of objects culled   (*) /proc/fs/fscache/histogram diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt index baf41118660d..7af2851d667c 100644 --- a/Documentation/filesystems/dax.txt +++ b/Documentation/filesystems/dax.txt @@ -18,8 +18,10 @@ Usage  -----  If you have a block device which supports DAX, you can make a filesystem -on it as usual.  When mounting it, use the -o dax option manually -or add 'dax' to the options in /etc/fstab. +on it as usual.  The DAX code currently only supports files with a block +size equal to your kernel's PAGE_SIZE, so you may need to specify a block +size when creating the filesystem.  When mounting it, use the "-o dax" +option on the command line or add 'dax' to the options in /etc/fstab.  Implementation Tips for Block Driver Writers diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 68f1c9106573..f24d1b833957 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -500,3 +500,7 @@ in your dentry operations instead.  	dentry,  it does not get nameidata at all and it gets called only when cookie  	is non-NULL.  Note that link body isn't available anymore, so if you need it,  	store it as cookie. +-- +[mandatory] +	__fd_install() & fd_install() can now sleep. Callers should not +	hold a spinlock	or other resources that do not allow a schedule. diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index e0cf99893212..807f7d61d7a7 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -71,15 +71,12 @@ static void print_task_path_n_nm(struct task_struct *tsk, char *buf)  	mmput(mm);  	if (exe_file) { -		path = exe_file->f_path; -		path_get(&exe_file->f_path); +		path_nm = file_path(exe_file, buf, 255);  		fput(exe_file); -		path_nm = d_path(&path, buf, 255); -		path_put(&path);  	}  done: -	pr_info("Path: %s\n", path_nm); +	pr_info("Path: %s\n", !IS_ERR(path_nm) ? path_nm : "?");  }  static void show_faulting_vma(unsigned long address, char *buf) @@ -103,8 +100,7 @@ static void show_faulting_vma(unsigned long address, char *buf)  	if (vma && (vma->vm_start <= address)) {  		struct file *file = vma->vm_file;  		if (file) { -			struct path *path = &file->f_path; -			nm = d_path(path, buf, PAGE_SIZE - 1); +			nm = file_path(file, buf, PAGE_SIZE - 1);  			inode = file_inode(vma->vm_file);  			dev = inode->i_sb->s_dev;  			ino = inode->i_ino; diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c index c36efa0c7163..719dd796c12c 100644 --- a/arch/blackfin/kernel/trace.c +++ b/arch/blackfin/kernel/trace.c @@ -136,7 +136,7 @@ void decode_address(char *buf, unsigned long address)  				struct file *file = vma->vm_file;  				if (file) { -					char *d_name = d_path(&file->f_path, _tmpbuf, +					char *d_name = file_path(file, _tmpbuf,  						      sizeof(_tmpbuf));  					if (!IS_ERR(d_name))  						name = d_name; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 1ba6307be4db..11634fa7ab3c 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -166,7 +166,7 @@ static void spufs_prune_dir(struct dentry *dir)  	mutex_lock(&d_inode(dir)->i_mutex);  	list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) {  		spin_lock(&dentry->d_lock); -		if (!(d_unhashed(dentry)) && d_really_is_positive(dentry)) { +		if (simple_positive(dentry)) {  			dget_dlock(dentry);  			__d_drop(dentry);  			spin_unlock(&dentry->d_lock); diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 2eeb0a0f506d..b2e5902bd8f4 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -62,18 +62,13 @@ static void hypfs_add_dentry(struct dentry *dentry)  	hypfs_last_dentry = dentry;  } -static inline int hypfs_positive(struct dentry *dentry) -{ -	return d_really_is_positive(dentry) && !d_unhashed(dentry); -} -  static void hypfs_remove(struct dentry *dentry)  {  	struct dentry *parent;  	parent = dentry->d_parent;  	mutex_lock(&d_inode(parent)->i_mutex); -	if (hypfs_positive(dentry)) { +	if (simple_positive(dentry)) {  		if (d_is_dir(dentry))  			simple_rmdir(d_inode(parent), dentry);  		else diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 35d34635e4f1..402b9c85a894 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -332,7 +332,7 @@ static void describe_addr(struct KBacktraceIterator *kbt,  	}  	if (vma->vm_file) { -		p = d_path(&vma->vm_file->f_path, buf, bufsize); +		p = file_path(vma->vm_file, buf, bufsize);  		if (IS_ERR(p))  			p = "?";  		name = kbasename(p); diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c index f7ddae3725a4..6225cc998db1 100644 --- a/arch/tile/mm/elf.c +++ b/arch/tile/mm/elf.c @@ -56,7 +56,7 @@ static int notify_exec(struct mm_struct *mm)  	if (exe_file == NULL)  		goto done_free; -	path = d_path(&exe_file->f_path, buf, PAGE_SIZE); +	path = file_path(exe_file, buf, PAGE_SIZE);  	if (IS_ERR(path))  		goto done_put; diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c index a6ee3d750c30..6b88a35fb048 100644 --- a/drivers/block/drbd/drbd_debugfs.c +++ b/drivers/block/drbd/drbd_debugfs.c @@ -419,14 +419,6 @@ static int in_flight_summary_show(struct seq_file *m, void *pos)  	return 0;  } -/* simple_positive(file->f_path.dentry) respectively debugfs_positive(), - * but neither is "reachable" from here. - * So we have our own inline version of it above.  :-( */ -static inline int debugfs_positive(struct dentry *dentry) -{ -        return d_really_is_positive(dentry) && !d_unhashed(dentry); -} -  /* make sure at *open* time that the respective object won't go away. */  static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, void *),  		                void *data, struct kref *kref, @@ -444,7 +436,7 @@ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, vo  	/* serialize with d_delete() */  	mutex_lock(&d_inode(parent)->i_mutex);  	/* Make sure the object is still alive */ -	if (debugfs_positive(file->f_path.dentry) +	if (simple_positive(file->f_path.dentry)  	&& kref_get_unless_zero(kref))  		ret = 0;  	mutex_unlock(&d_inode(parent)->i_mutex); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 40580dc7f41c..f7a4c9d7f721 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -588,7 +588,7 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)  	spin_lock_irq(&lo->lo_lock);  	if (lo->lo_backing_file) -		p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1); +		p = file_path(lo->lo_backing_file, buf, PAGE_SIZE - 1);  	spin_unlock_irq(&lo->lo_lock);  	if (IS_ERR_OR_NULL(p)) diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 1ca8e32a9592..25422a3a7238 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -277,7 +277,7 @@ static int remove_file(struct dentry *parent, char *name)  	}  	spin_lock(&tmp->d_lock); -	if (!d_unhashed(tmp) && d_really_is_positive(tmp)) { +	if (simple_positive(tmp)) {  		dget_dlock(tmp);  		__d_drop(tmp);  		spin_unlock(&tmp->d_lock); diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index bdd5d3857203..13ef22bd9459 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -455,7 +455,7 @@ static int remove_file(struct dentry *parent, char *name)  	}  	spin_lock(&tmp->d_lock); -	if (!d_unhashed(tmp) && d_really_is_positive(tmp)) { +	if (simple_positive(tmp)) {  		__d_drop(tmp);  		spin_unlock(&tmp->d_lock);  		simple_unlink(d_inode(parent), tmp); diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 135a0907e9de..ed2346ddf4c9 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -839,7 +839,7 @@ static void bitmap_file_kick(struct bitmap *bitmap)  		if (bitmap->storage.file) {  			path = kmalloc(PAGE_SIZE, GFP_KERNEL);  			if (path) -				ptr = d_path(&bitmap->storage.file->f_path, +				ptr = file_path(bitmap->storage.file,  					     path, PAGE_SIZE);  			printk(KERN_ALERT @@ -1927,7 +1927,7 @@ void bitmap_status(struct seq_file *seq, struct bitmap *bitmap)  		   chunk_kb ? "KB" : "B");  	if (bitmap->storage.file) {  		seq_printf(seq, ", file: "); -		seq_path(seq, &bitmap->storage.file->f_path, " \t\n"); +		seq_file_path(seq, bitmap->storage.file, " \t\n");  	}  	seq_printf(seq, "\n"); diff --git a/drivers/md/md.c b/drivers/md/md.c index df92d30ca054..d429c30cd514 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5766,7 +5766,7 @@ static int get_bitmap_file(struct mddev *mddev, void __user * arg)  	/* bitmap disabled, zero the first byte and copy out */  	if (!mddev->bitmap_info.file)  		file->pathname[0] = '\0'; -	else if ((ptr = d_path(&mddev->bitmap_info.file->f_path, +	else if ((ptr = file_path(mddev->bitmap_info.file,  			       file->pathname, sizeof(file->pathname))),  		 IS_ERR(ptr))  		err = PTR_ERR(ptr); diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 3cc109f3c9c8..d2259c663996 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -2936,7 +2936,7 @@ int fsg_common_create_lun(struct fsg_common *common, struct fsg_lun_config *cfg,  	if (fsg_lun_is_open(lun)) {  		p = "(error)";  		if (pathbuf) { -			p = d_path(&lun->filp->f_path, pathbuf, PATH_MAX); +			p = file_path(lun->filp, pathbuf, PATH_MAX);  			if (IS_ERR(p))  				p = "(error)";  		} diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c index 648f9e489b39..d62683017cf3 100644 --- a/drivers/usb/gadget/function/storage_common.c +++ b/drivers/usb/gadget/function/storage_common.c @@ -341,7 +341,7 @@ ssize_t fsg_show_file(struct fsg_lun *curlun, struct rw_semaphore *filesem,  	down_read(filesem);  	if (fsg_lun_is_open(curlun)) {	/* Get the complete pathname */ -		p = d_path(&curlun->filp->f_path, buf, PAGE_SIZE - 1); +		p = file_path(curlun->filp, buf, PAGE_SIZE - 1);  		if (IS_ERR(p))  			rc = PTR_ERR(p);  		else { diff --git a/fs/affs/affs.h b/fs/affs/affs.h index cffe8370fb44..c69a87eaf57d 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -64,7 +64,7 @@ struct affs_inode_info {  /* short cut to get to the affs specific inode data */  static inline struct affs_inode_info *AFFS_I(struct inode *inode)  { -	return list_entry(inode, struct affs_inode_info, vfs_inode); +	return container_of(inode, struct affs_inode_info, vfs_inode);  }  /* diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 5b700ef1e59d..c37149b929be 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -238,11 +238,6 @@ static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi)  	return d_inode(sbi->sb->s_root)->i_ino;  } -static inline int simple_positive(struct dentry *dentry) -{ -	return d_really_is_positive(dentry) && !d_unhashed(dentry); -} -  static inline void __autofs4_add_expiring(struct dentry *dentry)  {  	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); diff --git a/fs/befs/befs.h b/fs/befs/befs.h index 1fead8d56a98..35d19e8731e3 100644 --- a/fs/befs/befs.h +++ b/fs/befs/befs.h @@ -112,7 +112,7 @@ BEFS_SB(const struct super_block *super)  static inline struct befs_inode_info *  BEFS_I(const struct inode *inode)  { -	return list_entry(inode, struct befs_inode_info, vfs_inode); +	return container_of(inode, struct befs_inode_info, vfs_inode);  }  static inline befs_blocknr_t diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index cd46e4158830..6b659967898e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1530,7 +1530,7 @@ static int fill_files_note(struct memelfnote *note)  		file = vma->vm_file;  		if (!file)  			continue; -		filename = d_path(&file->f_path, name_curpos, remaining); +		filename = file_path(file, name_curpos, remaining);  		if (IS_ERR(filename)) {  			if (PTR_ERR(filename) == -ENAMETOOLONG) {  				vfree(data); @@ -1540,7 +1540,7 @@ static int fill_files_note(struct memelfnote *note)  			continue;  		} -		/* d_path() fills at the end, move name down */ +		/* file_path() fills at the end, move name down */  		/* n = strlen(filename) + 1: */  		n = (name_curpos + remaining) - filename;  		remaining = filename - name_curpos; diff --git a/fs/block_dev.c b/fs/block_dev.c index 4fe10f93db8a..198243717da5 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -152,6 +152,9 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)  	struct file *file = iocb->ki_filp;  	struct inode *inode = file->f_mapping->host; +	if (IS_DAX(inode)) +		return dax_do_io(iocb, inode, iter, offset, blkdev_get_block, +				NULL, DIO_SKIP_DIO_COUNT);  	return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,  				    blkdev_get_block, NULL, NULL,  				    DIO_SKIP_DIO_COUNT); @@ -443,6 +446,12 @@ long bdev_direct_access(struct block_device *bdev, sector_t sector,  	long avail;  	const struct block_device_operations *ops = bdev->bd_disk->fops; +	/* +	 * The device driver is allowed to sleep, in order to make the +	 * memory directly accessible. +	 */ +	might_sleep(); +  	if (size < 0)  		return size;  	if (!ops->direct_access) @@ -1170,6 +1179,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)  		bdev->bd_disk = disk;  		bdev->bd_queue = disk->queue;  		bdev->bd_contains = bdev; +		bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0;  		if (!partno) {  			ret = -ENXIO;  			bdev->bd_part = disk_get_part(disk, partno); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 795d754327a7..b823fac91c92 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1748,7 +1748,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,  	}  	current->backing_dev_info = inode_to_bdi(inode); -	err = file_remove_suid(file); +	err = file_remove_privs(file);  	if (err) {  		mutex_unlock(&inode->i_mutex);  		goto out; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 8c52472d2efa..aecd0859eacb 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -43,7 +43,6 @@ struct cachefiles_object {  	loff_t				i_size;		/* object size */  	unsigned long			flags;  #define CACHEFILES_OBJECT_ACTIVE	0		/* T if marked active */ -#define CACHEFILES_OBJECT_BURIED	1		/* T if preemptively buried */  	atomic_t			usage;		/* object usage count */  	uint8_t				type;		/* object type */  	uint8_t				new;		/* T if object new */ diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index ab857ab9f40d..fc1056f5c96a 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -97,7 +97,8 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object,   *   call vfs_unlink(), vfs_rmdir() or vfs_rename()   */  static void cachefiles_mark_object_buried(struct cachefiles_cache *cache, -					  struct dentry *dentry) +					  struct dentry *dentry, +					  enum fscache_why_object_killed why)  {  	struct cachefiles_object *object;  	struct rb_node *p; @@ -132,8 +133,9 @@ found_dentry:  		pr_err("\n");  		pr_err("Error: Can't preemptively bury live object\n");  		cachefiles_printk_object(object, NULL); -	} else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { -		pr_err("Error: Object already preemptively buried\n"); +	} else { +		if (why != FSCACHE_OBJECT_IS_STALE) +			fscache_object_mark_killed(&object->fscache, why);  	}  	write_unlock(&cache->active_lock); @@ -265,7 +267,8 @@ requeue:  static int cachefiles_bury_object(struct cachefiles_cache *cache,  				  struct dentry *dir,  				  struct dentry *rep, -				  bool preemptive) +				  bool preemptive, +				  enum fscache_why_object_killed why)  {  	struct dentry *grave, *trap;  	struct path path, path_to_graveyard; @@ -289,7 +292,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,  			ret = vfs_unlink(d_inode(dir), rep, NULL);  			if (preemptive) -				cachefiles_mark_object_buried(cache, rep); +				cachefiles_mark_object_buried(cache, rep, why);  		}  		mutex_unlock(&d_inode(dir)->i_mutex); @@ -394,7 +397,7 @@ try_again:  					    "Rename failed with error %d", ret);  		if (preemptive) -			cachefiles_mark_object_buried(cache, rep); +			cachefiles_mark_object_buried(cache, rep, why);  	}  	unlock_rename(cache->graveyard, dir); @@ -422,7 +425,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,  	mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); -	if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { +	if (test_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->fscache.flags)) {  		/* object allocation for the same key preemptively deleted this  		 * object's file so that it could create its own file */  		_debug("object preemptively buried"); @@ -433,7 +436,8 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,  		 * may have been renamed */  		if (dir == object->dentry->d_parent) {  			ret = cachefiles_bury_object(cache, dir, -						     object->dentry, false); +						     object->dentry, false, +						     FSCACHE_OBJECT_WAS_RETIRED);  		} else {  			/* it got moved, presumably by cachefilesd culling it,  			 * so it's no longer in the key path and we can ignore @@ -522,7 +526,7 @@ lookup_again:  		if (d_is_negative(next)) {  			ret = cachefiles_has_space(cache, 1, 0);  			if (ret < 0) -				goto create_error; +				goto no_space_error;  			path.dentry = dir;  			ret = security_path_mkdir(&path, next, 0); @@ -551,7 +555,7 @@ lookup_again:  		if (d_is_negative(next)) {  			ret = cachefiles_has_space(cache, 1, 0);  			if (ret < 0) -				goto create_error; +				goto no_space_error;  			path.dentry = dir;  			ret = security_path_mknod(&path, next, S_IFREG, 0); @@ -602,7 +606,8 @@ lookup_again:  			 * mutex) */  			object->dentry = NULL; -			ret = cachefiles_bury_object(cache, dir, next, true); +			ret = cachefiles_bury_object(cache, dir, next, true, +						     FSCACHE_OBJECT_IS_STALE);  			dput(next);  			next = NULL; @@ -610,6 +615,7 @@ lookup_again:  				goto delete_error;  			_debug("redo lookup"); +			fscache_object_retrying_stale(&object->fscache);  			goto lookup_again;  		}  	} @@ -662,6 +668,8 @@ lookup_again:  	_leave(" = 0 [%lu]", d_backing_inode(object->dentry)->i_ino);  	return 0; +no_space_error: +	fscache_object_mark_killed(&object->fscache, FSCACHE_OBJECT_NO_SPACE);  create_error:  	_debug("create error %d", ret);  	if (ret == -EIO) @@ -927,7 +935,8 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,  	/*  actually remove the victim (drops the dir mutex) */  	_debug("bury"); -	ret = cachefiles_bury_object(cache, dir, victim, false); +	ret = cachefiles_bury_object(cache, dir, victim, false, +				     FSCACHE_OBJECT_WAS_CULLED);  	if (ret < 0)  		goto error; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index faf92095e105..8b79d87eaf46 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -962,7 +962,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)  	pos = iocb->ki_pos;  	count = iov_iter_count(from); -	err = file_remove_suid(file); +	err = file_remove_privs(file);  	if (err)  		goto out; diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h index d6f7a76a1f5b..f829fe963f5b 100644 --- a/fs/coda/coda_linux.h +++ b/fs/coda/coda_linux.h @@ -79,7 +79,7 @@ void coda_sysctl_clean(void);  static inline struct coda_inode_info *ITOC(struct inode *inode)  { -	return list_entry(inode, struct coda_inode_info, vfs_inode); +	return container_of(inode, struct coda_inode_info, vfs_inode);  }  static __inline__ struct CodaFid *coda_i2f(struct inode *inode) diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 8d89f5fd0331..eae87575e681 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -236,7 +236,7 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)  	if (dentry) {  		spin_lock(&dentry->d_lock); -		if (!d_unhashed(dentry) && d_really_is_positive(dentry)) { +		if (simple_positive(dentry)) {  			dget_dlock(dentry);  			__d_drop(dentry);  			spin_unlock(&dentry->d_lock); diff --git a/fs/coredump.c b/fs/coredump.c index e52e0064feac..c5ecde6f3eed 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -140,7 +140,7 @@ static int cn_print_exe_file(struct core_name *cn)  		goto put_exe_file;  	} -	path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); +	path = file_path(exe_file, pathbuf, PATH_MAX);  	if (IS_ERR(path)) {  		ret = PTR_ERR(path);  		goto free_buf; @@ -155,7 +155,7 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,  		}  		if (iov_iter_rw(iter) == WRITE) -			len = copy_from_iter(addr, max - pos, iter); +			len = copy_from_iter_nocache(addr, max - pos, iter);  		else if (!hole)  			len = copy_to_iter(addr, max - pos, iter);  		else @@ -209,7 +209,8 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,  	}  	/* Protects against truncate */ -	inode_dio_begin(inode); +	if (!(flags & DIO_SKIP_DIO_COUNT)) +		inode_dio_begin(inode);  	retval = dax_io(inode, iter, pos, end, get_block, &bh); @@ -219,7 +220,8 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,  	if ((retval > 0) && end_io)  		end_io(iocb, pos, retval, bh.b_private); -	inode_dio_end(inode); +	if (!(flags & DIO_SKIP_DIO_COUNT)) +		inode_dio_end(inode);   out:  	return retval;  } diff --git a/fs/dcache.c b/fs/dcache.c index 910968b4b6bf..7a3f3e5f9cea 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1673,7 +1673,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)  				DCACHE_OP_COMPARE	|  				DCACHE_OP_REVALIDATE	|  				DCACHE_OP_WEAK_REVALIDATE	| -				DCACHE_OP_DELETE )); +				DCACHE_OP_DELETE	| +				DCACHE_OP_SELECT_INODE));  	dentry->d_op = op;  	if (!op)  		return; @@ -1689,6 +1690,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)  		dentry->d_flags |= DCACHE_OP_DELETE;  	if (op->d_prune)  		dentry->d_flags |= DCACHE_OP_PRUNE; +	if (op->d_select_inode) +		dentry->d_flags |= DCACHE_OP_SELECT_INODE;  }  EXPORT_SYMBOL(d_set_d_op); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index d6d1cf004123..c711be8d6a3c 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -44,11 +44,6 @@ static struct inode *debugfs_get_inode(struct super_block *sb)  	return inode;  } -static inline int debugfs_positive(struct dentry *dentry) -{ -	return d_really_is_positive(dentry) && !d_unhashed(dentry); -} -  struct debugfs_mount_opts {  	kuid_t uid;  	kgid_t gid; @@ -522,7 +517,7 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)  {  	int ret = 0; -	if (debugfs_positive(dentry)) { +	if (simple_positive(dentry)) {  		dget(dentry);  		if (d_is_dir(dentry))  			ret = simple_rmdir(d_inode(parent), dentry); @@ -602,7 +597,7 @@ void debugfs_remove_recursive(struct dentry *dentry)  	 */  	spin_lock(&parent->d_lock);  	list_for_each_entry(child, &parent->d_subdirs, d_child) { -		if (!debugfs_positive(child)) +		if (!simple_positive(child))  			continue;  		/* perhaps simple_empty(child) makes more sense */ @@ -623,7 +618,7 @@ void debugfs_remove_recursive(struct dentry *dentry)  		 * from d_subdirs. When releasing the parent->d_lock we can  		 * no longer trust that the next pointer is valid.  		 * Restart the loop. We'll skip this one with the -		 * debugfs_positive() check. +		 * simple_positive() check.  		 */  		goto loop;  	} diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 4deb0b05b011..e5bb2abf77f9 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -44,12 +44,6 @@ static inline void exofs_put_page(struct page *page)  	page_cache_release(page);  } -/* Accesses dir's inode->i_size must be called under inode lock */ -static inline unsigned long dir_pages(struct inode *inode) -{ -	return (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; -} -  static unsigned exofs_last_byte(struct inode *inode, unsigned long page_nr)  {  	loff_t last_byte = inode->i_size; diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 796b491e6978..0c6638b40f21 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -70,11 +70,6 @@ static inline void ext2_put_page(struct page *page)  	page_cache_release(page);  } -static inline unsigned long dir_pages(struct inode *inode) -{ -	return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} -  /*   * Return the offset into page `page_nr' of the last valid   * byte in that page, plus one. diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5c787647afe2..58987b5c514b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -452,7 +452,7 @@ void __ext4_error_file(struct file *file, const char *function,  	es = EXT4_SB(inode->i_sb)->s_es;  	es->s_last_error_ino = cpu_to_le32(inode->i_ino);  	if (ext4_error_ratelimit(inode->i_sb)) { -		path = d_path(&(file->f_path), pathname, sizeof(pathname)); +		path = file_path(file, pathname, sizeof(pathname));  		if (IS_ERR(path))  			path = "(unknown)";  		va_start(args, fmt); diff --git a/fs/file.c b/fs/file.c index 93c5f89c248b..6c672ad329e9 100644 --- a/fs/file.c +++ b/fs/file.c @@ -147,6 +147,13 @@ static int expand_fdtable(struct files_struct *files, int nr)  	spin_unlock(&files->file_lock);  	new_fdt = alloc_fdtable(nr); + +	/* make sure all __fd_install() have seen resize_in_progress +	 * or have finished their rcu_read_lock_sched() section. +	 */ +	if (atomic_read(&files->count) > 1) +		synchronize_sched(); +  	spin_lock(&files->file_lock);  	if (!new_fdt)  		return -ENOMEM; @@ -158,21 +165,14 @@ static int expand_fdtable(struct files_struct *files, int nr)  		__free_fdtable(new_fdt);  		return -EMFILE;  	} -	/* -	 * Check again since another task may have expanded the fd table while -	 * we dropped the lock -	 */  	cur_fdt = files_fdtable(files); -	if (nr >= cur_fdt->max_fds) { -		/* Continue as planned */ -		copy_fdtable(new_fdt, cur_fdt); -		rcu_assign_pointer(files->fdt, new_fdt); -		if (cur_fdt != &files->fdtab) -			call_rcu(&cur_fdt->rcu, free_fdtable_rcu); -	} else { -		/* Somebody else expanded, so undo our attempt */ -		__free_fdtable(new_fdt); -	} +	BUG_ON(nr < cur_fdt->max_fds); +	copy_fdtable(new_fdt, cur_fdt); +	rcu_assign_pointer(files->fdt, new_fdt); +	if (cur_fdt != &files->fdtab) +		call_rcu(&cur_fdt->rcu, free_fdtable_rcu); +	/* coupled with smp_rmb() in __fd_install() */ +	smp_wmb();  	return 1;  } @@ -185,21 +185,38 @@ static int expand_fdtable(struct files_struct *files, int nr)   * The files->file_lock should be held on entry, and will be held on exit.   */  static int expand_files(struct files_struct *files, int nr) +	__releases(files->file_lock) +	__acquires(files->file_lock)  {  	struct fdtable *fdt; +	int expanded = 0; +repeat:  	fdt = files_fdtable(files);  	/* Do we need to expand? */  	if (nr < fdt->max_fds) -		return 0; +		return expanded;  	/* Can we expand? */  	if (nr >= sysctl_nr_open)  		return -EMFILE; +	if (unlikely(files->resize_in_progress)) { +		spin_unlock(&files->file_lock); +		expanded = 1; +		wait_event(files->resize_wait, !files->resize_in_progress); +		spin_lock(&files->file_lock); +		goto repeat; +	} +  	/* All good, so we try */ -	return expand_fdtable(files, nr); +	files->resize_in_progress = true; +	expanded = expand_fdtable(files, nr); +	files->resize_in_progress = false; + +	wake_up_all(&files->resize_wait); +	return expanded;  }  static inline void __set_close_on_exec(int fd, struct fdtable *fdt) @@ -256,6 +273,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)  	atomic_set(&newf->count, 1);  	spin_lock_init(&newf->file_lock); +	newf->resize_in_progress = false; +	init_waitqueue_head(&newf->resize_wait);  	newf->next_fd = 0;  	new_fdt = &newf->fdtab;  	new_fdt->max_fds = NR_OPEN_DEFAULT; @@ -553,11 +572,21 @@ void __fd_install(struct files_struct *files, unsigned int fd,  		struct file *file)  {  	struct fdtable *fdt; -	spin_lock(&files->file_lock); -	fdt = files_fdtable(files); + +	might_sleep(); +	rcu_read_lock_sched(); + +	while (unlikely(files->resize_in_progress)) { +		rcu_read_unlock_sched(); +		wait_event(files->resize_wait, !files->resize_in_progress); +		rcu_read_lock_sched(); +	} +	/* coupled with smp_wmb() in expand_fdtable() */ +	smp_rmb(); +	fdt = rcu_dereference_sched(files->fdt);  	BUG_ON(fdt->fd[fd] != NULL);  	rcu_assign_pointer(fdt->fd[fd], file); -	spin_unlock(&files->file_lock); +	rcu_read_unlock_sched();  }  void fd_install(unsigned int fd, struct file *file) @@ -635,11 +664,17 @@ static struct file *__fget(unsigned int fd, fmode_t mask)  	struct file *file;  	rcu_read_lock(); +loop:  	file = fcheck_files(files, fd);  	if (file) { -		/* File object ref couldn't be taken */ -		if ((file->f_mode & mask) || !get_file_rcu(file)) +		/* File object ref couldn't be taken. +		 * dup2() atomicity guarantee is the reason +		 * we loop to catch the new file (or NULL pointer) +		 */ +		if (file->f_mode & mask)  			file = NULL; +		else if (!get_file_rcu(file)) +			goto loop;  	}  	rcu_read_unlock(); diff --git a/fs/file_table.c b/fs/file_table.c index 294174dcc226..7f9d407c7595 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -20,7 +20,6 @@  #include <linux/cdev.h>  #include <linux/fsnotify.h>  #include <linux/sysctl.h> -#include <linux/lglock.h>  #include <linux/percpu_counter.h>  #include <linux/percpu.h>  #include <linux/hardirq.h> diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index 99c7f0a37af4..484b32d3234a 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c @@ -61,13 +61,6 @@ const struct file_operations vxfs_dir_operations = {  	.iterate =		vxfs_readdir,  }; -  -static inline u_long -dir_pages(struct inode *inode) -{ -	return (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; -} -   static inline u_long  dir_blocks(struct inode *ip)  { diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 89acec742e0b..d403c69bee08 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -327,7 +327,8 @@ static int fscache_alloc_object(struct fscache_cache *cache,  object_already_extant:  	ret = -ENOBUFS; -	if (fscache_object_is_dead(object)) { +	if (fscache_object_is_dying(object) || +	    fscache_cache_is_broken(object)) {  		spin_unlock(&cookie->lock);  		goto error;  	} @@ -671,7 +672,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie)  	if (!op)  		return -ENOMEM; -	fscache_operation_init(op, NULL, NULL); +	fscache_operation_init(op, NULL, NULL, NULL);  	op->flags = FSCACHE_OP_MYTHREAD |  		(1 << FSCACHE_OP_WAITING) |  		(1 << FSCACHE_OP_UNUSE_COOKIE); @@ -695,8 +696,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie)  	/* the work queue now carries its own ref on the object */  	spin_unlock(&cookie->lock); -	ret = fscache_wait_for_operation_activation(object, op, -						    NULL, NULL, NULL); +	ret = fscache_wait_for_operation_activation(object, op, NULL, NULL);  	if (ret == 0) {  		/* ask the cache to honour the operation */  		ret = object->cache->ops->check_consistency(op); diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 7872a62ef30c..97ec45110957 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -124,8 +124,7 @@ extern int fscache_submit_exclusive_op(struct fscache_object *,  				       struct fscache_operation *);  extern int fscache_submit_op(struct fscache_object *,  			     struct fscache_operation *); -extern int fscache_cancel_op(struct fscache_operation *, -			     void (*)(struct fscache_operation *)); +extern int fscache_cancel_op(struct fscache_operation *, bool);  extern void fscache_cancel_all_ops(struct fscache_object *);  extern void fscache_abort_object(struct fscache_object *);  extern void fscache_start_operations(struct fscache_object *); @@ -138,8 +137,7 @@ extern int fscache_wait_for_deferred_lookup(struct fscache_cookie *);  extern int fscache_wait_for_operation_activation(struct fscache_object *,  						 struct fscache_operation *,  						 atomic_t *, -						 atomic_t *, -						 void (*)(struct fscache_operation *)); +						 atomic_t *);  extern void fscache_invalidate_writes(struct fscache_cookie *);  /* @@ -164,6 +162,7 @@ extern atomic_t fscache_n_op_pend;  extern atomic_t fscache_n_op_run;  extern atomic_t fscache_n_op_enqueue;  extern atomic_t fscache_n_op_deferred_release; +extern atomic_t fscache_n_op_initialised;  extern atomic_t fscache_n_op_release;  extern atomic_t fscache_n_op_gc;  extern atomic_t fscache_n_op_cancelled; @@ -271,6 +270,11 @@ extern atomic_t fscache_n_cop_write_page;  extern atomic_t fscache_n_cop_uncache_page;  extern atomic_t fscache_n_cop_dissociate_pages; +extern atomic_t fscache_n_cache_no_space_reject; +extern atomic_t fscache_n_cache_stale_objects; +extern atomic_t fscache_n_cache_retired_objects; +extern atomic_t fscache_n_cache_culled_objects; +  static inline void fscache_stat(atomic_t *stat)  {  	atomic_inc(stat); diff --git a/fs/fscache/object.c b/fs/fscache/object.c index da032daf0e0d..9e792e30f4db 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -328,6 +328,17 @@ void fscache_object_init(struct fscache_object *object,  EXPORT_SYMBOL(fscache_object_init);  /* + * Mark the object as no longer being live, making sure that we synchronise + * against op submission. + */ +static inline void fscache_mark_object_dead(struct fscache_object *object) +{ +	spin_lock(&object->lock); +	clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); +	spin_unlock(&object->lock); +} + +/*   * Abort object initialisation before we start it.   */  static const struct fscache_state *fscache_abort_initialisation(struct fscache_object *object, @@ -610,6 +621,8 @@ static const struct fscache_state *fscache_lookup_failure(struct fscache_object  	object->cache->ops->lookup_complete(object);  	fscache_stat_d(&fscache_n_cop_lookup_complete); +	set_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->flags); +  	cookie = object->cookie;  	set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags);  	if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) @@ -629,7 +642,7 @@ static const struct fscache_state *fscache_kill_object(struct fscache_object *ob  	_enter("{OBJ%x,%d,%d},%d",  	       object->debug_id, object->n_ops, object->n_children, event); -	clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); +	fscache_mark_object_dead(object);  	object->oob_event_mask = 0;  	if (list_empty(&object->dependents) && @@ -948,7 +961,8 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj  	if (!op)  		goto nomem; -	fscache_operation_init(op, object->cache->ops->invalidate_object, NULL); +	fscache_operation_init(op, object->cache->ops->invalidate_object, +			       NULL, NULL);  	op->flags = FSCACHE_OP_ASYNC |  		(1 << FSCACHE_OP_EXCLUSIVE) |  		(1 << FSCACHE_OP_UNUSE_COOKIE); @@ -974,13 +988,13 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj  	return transit_to(UPDATE_OBJECT);  nomem: -	clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); +	fscache_mark_object_dead(object);  	fscache_unuse_cookie(object);  	_leave(" [ENOMEM]");  	return transit_to(KILL_OBJECT);  submit_op_failed: -	clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); +	fscache_mark_object_dead(object);  	spin_unlock(&cookie->lock);  	fscache_unuse_cookie(object);  	kfree(op); @@ -1016,3 +1030,50 @@ static const struct fscache_state *fscache_update_object(struct fscache_object *  	_leave("");  	return transit_to(WAIT_FOR_CMD);  } + +/** + * fscache_object_retrying_stale - Note retrying stale object + * @object: The object that will be retried + * + * Note that an object lookup found an on-disk object that was adjudged to be + * stale and has been deleted.  The lookup will be retried. + */ +void fscache_object_retrying_stale(struct fscache_object *object) +{ +	fscache_stat(&fscache_n_cache_no_space_reject); +} +EXPORT_SYMBOL(fscache_object_retrying_stale); + +/** + * fscache_object_mark_killed - Note that an object was killed + * @object: The object that was culled + * @why: The reason the object was killed. + * + * Note that an object was killed.  Returns true if the object was + * already marked killed, false if it wasn't. + */ +void fscache_object_mark_killed(struct fscache_object *object, +				enum fscache_why_object_killed why) +{ +	if (test_and_set_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->flags)) { +		pr_err("Error: Object already killed by cache [%s]\n", +		       object->cache->identifier); +		return; +	} + +	switch (why) { +	case FSCACHE_OBJECT_NO_SPACE: +		fscache_stat(&fscache_n_cache_no_space_reject); +		break; +	case FSCACHE_OBJECT_IS_STALE: +		fscache_stat(&fscache_n_cache_stale_objects); +		break; +	case FSCACHE_OBJECT_WAS_RETIRED: +		fscache_stat(&fscache_n_cache_retired_objects); +		break; +	case FSCACHE_OBJECT_WAS_CULLED: +		fscache_stat(&fscache_n_cache_culled_objects); +		break; +	} +} +EXPORT_SYMBOL(fscache_object_mark_killed); diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index e7b87a0e5185..de67745e1cd7 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -20,6 +20,35 @@  atomic_t fscache_op_debug_id;  EXPORT_SYMBOL(fscache_op_debug_id); +static void fscache_operation_dummy_cancel(struct fscache_operation *op) +{ +} + +/** + * fscache_operation_init - Do basic initialisation of an operation + * @op: The operation to initialise + * @release: The release function to assign + * + * Do basic initialisation of an operation.  The caller must still set flags, + * object and processor if needed. + */ +void fscache_operation_init(struct fscache_operation *op, +			    fscache_operation_processor_t processor, +			    fscache_operation_cancel_t cancel, +			    fscache_operation_release_t release) +{ +	INIT_WORK(&op->work, fscache_op_work_func); +	atomic_set(&op->usage, 1); +	op->state = FSCACHE_OP_ST_INITIALISED; +	op->debug_id = atomic_inc_return(&fscache_op_debug_id); +	op->processor = processor; +	op->cancel = cancel ?: fscache_operation_dummy_cancel; +	op->release = release; +	INIT_LIST_HEAD(&op->pend_link); +	fscache_stat(&fscache_n_op_initialised); +} +EXPORT_SYMBOL(fscache_operation_init); +  /**   * fscache_enqueue_operation - Enqueue an operation for processing   * @op: The operation to enqueue @@ -76,6 +105,43 @@ static void fscache_run_op(struct fscache_object *object,  }  /* + * report an unexpected submission + */ +static void fscache_report_unexpected_submission(struct fscache_object *object, +						 struct fscache_operation *op, +						 const struct fscache_state *ostate) +{ +	static bool once_only; +	struct fscache_operation *p; +	unsigned n; + +	if (once_only) +		return; +	once_only = true; + +	kdebug("unexpected submission OP%x [OBJ%x %s]", +	       op->debug_id, object->debug_id, object->state->name); +	kdebug("objstate=%s [%s]", object->state->name, ostate->name); +	kdebug("objflags=%lx", object->flags); +	kdebug("objevent=%lx [%lx]", object->events, object->event_mask); +	kdebug("ops=%u inp=%u exc=%u", +	       object->n_ops, object->n_in_progress, object->n_exclusive); + +	if (!list_empty(&object->pending_ops)) { +		n = 0; +		list_for_each_entry(p, &object->pending_ops, pend_link) { +			ASSERTCMP(p->object, ==, object); +			kdebug("%p %p", op->processor, op->release); +			n++; +		} + +		kdebug("n=%u", n); +	} + +	dump_stack(); +} + +/*   * submit an exclusive operation for an object   * - other ops are excluded from running simultaneously with this one   * - this gets any extra refs it needs on an op @@ -83,6 +149,8 @@ static void fscache_run_op(struct fscache_object *object,  int fscache_submit_exclusive_op(struct fscache_object *object,  				struct fscache_operation *op)  { +	const struct fscache_state *ostate; +	unsigned long flags;  	int ret;  	_enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); @@ -95,8 +163,21 @@ int fscache_submit_exclusive_op(struct fscache_object *object,  	ASSERTCMP(object->n_ops, >=, object->n_exclusive);  	ASSERT(list_empty(&op->pend_link)); +	ostate = object->state; +	smp_rmb(); +  	op->state = FSCACHE_OP_ST_PENDING; -	if (fscache_object_is_active(object)) { +	flags = READ_ONCE(object->flags); +	if (unlikely(!(flags & BIT(FSCACHE_OBJECT_IS_LIVE)))) { +		fscache_stat(&fscache_n_op_rejected); +		op->cancel(op); +		op->state = FSCACHE_OP_ST_CANCELLED; +		ret = -ENOBUFS; +	} else if (unlikely(fscache_cache_is_broken(object))) { +		op->cancel(op); +		op->state = FSCACHE_OP_ST_CANCELLED; +		ret = -EIO; +	} else if (flags & BIT(FSCACHE_OBJECT_IS_AVAILABLE)) {  		op->object = object;  		object->n_ops++;  		object->n_exclusive++;	/* reads and writes must wait */ @@ -118,7 +199,7 @@ int fscache_submit_exclusive_op(struct fscache_object *object,  		/* need to issue a new write op after this */  		clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);  		ret = 0; -	} else if (test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)) { +	} else if (flags & BIT(FSCACHE_OBJECT_IS_LOOKED_UP)) {  		op->object = object;  		object->n_ops++;  		object->n_exclusive++;	/* reads and writes must wait */ @@ -126,12 +207,15 @@ int fscache_submit_exclusive_op(struct fscache_object *object,  		list_add_tail(&op->pend_link, &object->pending_ops);  		fscache_stat(&fscache_n_op_pend);  		ret = 0; +	} else if (flags & BIT(FSCACHE_OBJECT_KILLED_BY_CACHE)) { +		op->cancel(op); +		op->state = FSCACHE_OP_ST_CANCELLED; +		ret = -ENOBUFS;  	} else { -		/* If we're in any other state, there must have been an I/O -		 * error of some nature. -		 */ -		ASSERT(test_bit(FSCACHE_IOERROR, &object->cache->flags)); -		ret = -EIO; +		fscache_report_unexpected_submission(object, op, ostate); +		op->cancel(op); +		op->state = FSCACHE_OP_ST_CANCELLED; +		ret = -ENOBUFS;  	}  	spin_unlock(&object->lock); @@ -139,43 +223,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object,  }  /* - * report an unexpected submission - */ -static void fscache_report_unexpected_submission(struct fscache_object *object, -						 struct fscache_operation *op, -						 const struct fscache_state *ostate) -{ -	static bool once_only; -	struct fscache_operation *p; -	unsigned n; - -	if (once_only) -		return; -	once_only = true; - -	kdebug("unexpected submission OP%x [OBJ%x %s]", -	       op->debug_id, object->debug_id, object->state->name); -	kdebug("objstate=%s [%s]", object->state->name, ostate->name); -	kdebug("objflags=%lx", object->flags); -	kdebug("objevent=%lx [%lx]", object->events, object->event_mask); -	kdebug("ops=%u inp=%u exc=%u", -	       object->n_ops, object->n_in_progress, object->n_exclusive); - -	if (!list_empty(&object->pending_ops)) { -		n = 0; -		list_for_each_entry(p, &object->pending_ops, pend_link) { -			ASSERTCMP(p->object, ==, object); -			kdebug("%p %p", op->processor, op->release); -			n++; -		} - -		kdebug("n=%u", n); -	} - -	dump_stack(); -} - -/*   * submit an operation for an object   * - objects may be submitted only in the following states:   *   - during object creation (write ops may be submitted) @@ -187,6 +234,7 @@ int fscache_submit_op(struct fscache_object *object,  		      struct fscache_operation *op)  {  	const struct fscache_state *ostate; +	unsigned long flags;  	int ret;  	_enter("{OBJ%x OP%x},{%u}", @@ -204,7 +252,17 @@ int fscache_submit_op(struct fscache_object *object,  	smp_rmb();  	op->state = FSCACHE_OP_ST_PENDING; -	if (fscache_object_is_active(object)) { +	flags = READ_ONCE(object->flags); +	if (unlikely(!(flags & BIT(FSCACHE_OBJECT_IS_LIVE)))) { +		fscache_stat(&fscache_n_op_rejected); +		op->cancel(op); +		op->state = FSCACHE_OP_ST_CANCELLED; +		ret = -ENOBUFS; +	} else if (unlikely(fscache_cache_is_broken(object))) { +		op->cancel(op); +		op->state = FSCACHE_OP_ST_CANCELLED; +		ret = -EIO; +	} else if (flags & BIT(FSCACHE_OBJECT_IS_AVAILABLE)) {  		op->object = object;  		object->n_ops++; @@ -222,23 +280,21 @@ int fscache_submit_op(struct fscache_object *object,  			fscache_run_op(object, op);  		}  		ret = 0; -	} else if (test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)) { +	} else if (flags & BIT(FSCACHE_OBJECT_IS_LOOKED_UP)) {  		op->object = object;  		object->n_ops++;  		atomic_inc(&op->usage);  		list_add_tail(&op->pend_link, &object->pending_ops);  		fscache_stat(&fscache_n_op_pend);  		ret = 0; -	} else if (fscache_object_is_dying(object)) { -		fscache_stat(&fscache_n_op_rejected); +	} else if (flags & BIT(FSCACHE_OBJECT_KILLED_BY_CACHE)) { +		op->cancel(op);  		op->state = FSCACHE_OP_ST_CANCELLED;  		ret = -ENOBUFS; -	} else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) { +	} else {  		fscache_report_unexpected_submission(object, op, ostate);  		ASSERT(!fscache_object_is_active(object)); -		op->state = FSCACHE_OP_ST_CANCELLED; -		ret = -ENOBUFS; -	} else { +		op->cancel(op);  		op->state = FSCACHE_OP_ST_CANCELLED;  		ret = -ENOBUFS;  	} @@ -293,9 +349,10 @@ void fscache_start_operations(struct fscache_object *object)   * cancel an operation that's pending on an object   */  int fscache_cancel_op(struct fscache_operation *op, -		      void (*do_cancel)(struct fscache_operation *)) +		      bool cancel_in_progress_op)  {  	struct fscache_object *object = op->object; +	bool put = false;  	int ret;  	_enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id); @@ -309,19 +366,37 @@ int fscache_cancel_op(struct fscache_operation *op,  	ret = -EBUSY;  	if (op->state == FSCACHE_OP_ST_PENDING) {  		ASSERT(!list_empty(&op->pend_link)); -		fscache_stat(&fscache_n_op_cancelled);  		list_del_init(&op->pend_link); -		if (do_cancel) -			do_cancel(op); +		put = true; + +		fscache_stat(&fscache_n_op_cancelled); +		op->cancel(op); +		op->state = FSCACHE_OP_ST_CANCELLED; +		if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) +			object->n_exclusive--; +		if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) +			wake_up_bit(&op->flags, FSCACHE_OP_WAITING); +		ret = 0; +	} else if (op->state == FSCACHE_OP_ST_IN_PROGRESS && cancel_in_progress_op) { +		ASSERTCMP(object->n_in_progress, >, 0); +		if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) +			object->n_exclusive--; +		object->n_in_progress--; +		if (object->n_in_progress == 0) +			fscache_start_operations(object); + +		fscache_stat(&fscache_n_op_cancelled); +		op->cancel(op);  		op->state = FSCACHE_OP_ST_CANCELLED;  		if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags))  			object->n_exclusive--;  		if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))  			wake_up_bit(&op->flags, FSCACHE_OP_WAITING); -		fscache_put_operation(op);  		ret = 0;  	} +	if (put) +		fscache_put_operation(op);  	spin_unlock(&object->lock);  	_leave(" = %d", ret);  	return ret; @@ -345,6 +420,7 @@ void fscache_cancel_all_ops(struct fscache_object *object)  		list_del_init(&op->pend_link);  		ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING); +		op->cancel(op);  		op->state = FSCACHE_OP_ST_CANCELLED;  		if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) @@ -377,8 +453,12 @@ void fscache_op_complete(struct fscache_operation *op, bool cancelled)  	spin_lock(&object->lock); -	op->state = cancelled ? -		FSCACHE_OP_ST_CANCELLED : FSCACHE_OP_ST_COMPLETE; +	if (!cancelled) { +		op->state = FSCACHE_OP_ST_COMPLETE; +	} else { +		op->cancel(op); +		op->state = FSCACHE_OP_ST_CANCELLED; +	}  	if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags))  		object->n_exclusive--; @@ -409,9 +489,9 @@ void fscache_put_operation(struct fscache_operation *op)  		return;  	_debug("PUT OP"); -	ASSERTIFCMP(op->state != FSCACHE_OP_ST_COMPLETE, +	ASSERTIFCMP(op->state != FSCACHE_OP_ST_INITIALISED && +		    op->state != FSCACHE_OP_ST_COMPLETE,  		    op->state, ==, FSCACHE_OP_ST_CANCELLED); -	op->state = FSCACHE_OP_ST_DEAD;  	fscache_stat(&fscache_n_op_release); @@ -419,37 +499,39 @@ void fscache_put_operation(struct fscache_operation *op)  		op->release(op);  		op->release = NULL;  	} +	op->state = FSCACHE_OP_ST_DEAD;  	object = op->object; +	if (likely(object)) { +		if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) +			atomic_dec(&object->n_reads); +		if (test_bit(FSCACHE_OP_UNUSE_COOKIE, &op->flags)) +			fscache_unuse_cookie(object); + +		/* now... we may get called with the object spinlock held, so we +		 * complete the cleanup here only if we can immediately acquire the +		 * lock, and defer it otherwise */ +		if (!spin_trylock(&object->lock)) { +			_debug("defer put"); +			fscache_stat(&fscache_n_op_deferred_release); + +			cache = object->cache; +			spin_lock(&cache->op_gc_list_lock); +			list_add_tail(&op->pend_link, &cache->op_gc_list); +			spin_unlock(&cache->op_gc_list_lock); +			schedule_work(&cache->op_gc); +			_leave(" [defer]"); +			return; +		} -	if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) -		atomic_dec(&object->n_reads); -	if (test_bit(FSCACHE_OP_UNUSE_COOKIE, &op->flags)) -		fscache_unuse_cookie(object); - -	/* now... we may get called with the object spinlock held, so we -	 * complete the cleanup here only if we can immediately acquire the -	 * lock, and defer it otherwise */ -	if (!spin_trylock(&object->lock)) { -		_debug("defer put"); -		fscache_stat(&fscache_n_op_deferred_release); +		ASSERTCMP(object->n_ops, >, 0); +		object->n_ops--; +		if (object->n_ops == 0) +			fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); -		cache = object->cache; -		spin_lock(&cache->op_gc_list_lock); -		list_add_tail(&op->pend_link, &cache->op_gc_list); -		spin_unlock(&cache->op_gc_list_lock); -		schedule_work(&cache->op_gc); -		_leave(" [defer]"); -		return; +		spin_unlock(&object->lock);  	} -	ASSERTCMP(object->n_ops, >, 0); -	object->n_ops--; -	if (object->n_ops == 0) -		fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); - -	spin_unlock(&object->lock); -  	kfree(op);  	_leave(" [done]");  } diff --git a/fs/fscache/page.c b/fs/fscache/page.c index de33b3fccca6..483bbc613bf0 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -213,7 +213,7 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)  		return -ENOMEM;  	} -	fscache_operation_init(op, fscache_attr_changed_op, NULL); +	fscache_operation_init(op, fscache_attr_changed_op, NULL, NULL);  	op->flags = FSCACHE_OP_ASYNC |  		(1 << FSCACHE_OP_EXCLUSIVE) |  		(1 << FSCACHE_OP_UNUSE_COOKIE); @@ -239,7 +239,7 @@ nobufs_dec:  	wake_cookie = __fscache_unuse_cookie(cookie);  nobufs:  	spin_unlock(&cookie->lock); -	kfree(op); +	fscache_put_operation(op);  	if (wake_cookie)  		__fscache_wake_unused_cookie(cookie);  	fscache_stat(&fscache_n_attr_changed_nobufs); @@ -249,6 +249,17 @@ nobufs:  EXPORT_SYMBOL(__fscache_attr_changed);  /* + * Handle cancellation of a pending retrieval op + */ +static void fscache_do_cancel_retrieval(struct fscache_operation *_op) +{ +	struct fscache_retrieval *op = +		container_of(_op, struct fscache_retrieval, op); + +	atomic_set(&op->n_pages, 0); +} + +/*   * release a retrieval op reference   */  static void fscache_release_retrieval_op(struct fscache_operation *_op) @@ -258,11 +269,12 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op)  	_enter("{OP%x}", op->op.debug_id); -	ASSERTCMP(atomic_read(&op->n_pages), ==, 0); +	ASSERTIFCMP(op->op.state != FSCACHE_OP_ST_INITIALISED, +		    atomic_read(&op->n_pages), ==, 0);  	fscache_hist(fscache_retrieval_histogram, op->start_time);  	if (op->context) -		fscache_put_context(op->op.object->cookie, op->context); +		fscache_put_context(op->cookie, op->context);  	_leave("");  } @@ -285,15 +297,24 @@ static struct fscache_retrieval *fscache_alloc_retrieval(  		return NULL;  	} -	fscache_operation_init(&op->op, NULL, fscache_release_retrieval_op); +	fscache_operation_init(&op->op, NULL, +			       fscache_do_cancel_retrieval, +			       fscache_release_retrieval_op);  	op->op.flags	= FSCACHE_OP_MYTHREAD |  		(1UL << FSCACHE_OP_WAITING) |  		(1UL << FSCACHE_OP_UNUSE_COOKIE); +	op->cookie	= cookie;  	op->mapping	= mapping;  	op->end_io_func	= end_io_func;  	op->context	= context;  	op->start_time	= jiffies;  	INIT_LIST_HEAD(&op->to_do); + +	/* Pin the netfs read context in case we need to do the actual netfs +	 * read because we've encountered a cache read failure. +	 */ +	if (context) +		fscache_get_context(op->cookie, context);  	return op;  } @@ -330,24 +351,12 @@ int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)  }  /* - * Handle cancellation of a pending retrieval op - */ -static void fscache_do_cancel_retrieval(struct fscache_operation *_op) -{ -	struct fscache_retrieval *op = -		container_of(_op, struct fscache_retrieval, op); - -	atomic_set(&op->n_pages, 0); -} - -/*   * wait for an object to become active (or dead)   */  int fscache_wait_for_operation_activation(struct fscache_object *object,  					  struct fscache_operation *op,  					  atomic_t *stat_op_waits, -					  atomic_t *stat_object_dead, -					  void (*do_cancel)(struct fscache_operation *)) +					  atomic_t *stat_object_dead)  {  	int ret; @@ -359,7 +368,7 @@ int fscache_wait_for_operation_activation(struct fscache_object *object,  		fscache_stat(stat_op_waits);  	if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING,  			TASK_INTERRUPTIBLE) != 0) { -		ret = fscache_cancel_op(op, do_cancel); +		ret = fscache_cancel_op(op, false);  		if (ret == 0)  			return -ERESTARTSYS; @@ -377,11 +386,13 @@ check_if_dead:  		_leave(" = -ENOBUFS [cancelled]");  		return -ENOBUFS;  	} -	if (unlikely(fscache_object_is_dead(object))) { -		pr_err("%s() = -ENOBUFS [obj dead %d]\n", __func__, op->state); -		fscache_cancel_op(op, do_cancel); +	if (unlikely(fscache_object_is_dying(object) || +		     fscache_cache_is_broken(object))) { +		enum fscache_operation_state state = op->state; +		fscache_cancel_op(op, true);  		if (stat_object_dead)  			fscache_stat(stat_object_dead); +		_leave(" = -ENOBUFS [obj dead %d]", state);  		return -ENOBUFS;  	}  	return 0; @@ -453,17 +464,12 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,  	fscache_stat(&fscache_n_retrieval_ops); -	/* pin the netfs read context in case we need to do the actual netfs -	 * read because we've encountered a cache read failure */ -	fscache_get_context(object->cookie, op->context); -  	/* we wait for the operation to become active, and then process it  	 * *here*, in this thread, and not in the thread pool */  	ret = fscache_wait_for_operation_activation(  		object, &op->op,  		__fscache_stat(&fscache_n_retrieval_op_waits), -		__fscache_stat(&fscache_n_retrievals_object_dead), -		fscache_do_cancel_retrieval); +		__fscache_stat(&fscache_n_retrievals_object_dead));  	if (ret < 0)  		goto error; @@ -503,7 +509,7 @@ nobufs_unlock:  	spin_unlock(&cookie->lock);  	if (wake_cookie)  		__fscache_wake_unused_cookie(cookie); -	kfree(op); +	fscache_put_retrieval(op);  nobufs:  	fscache_stat(&fscache_n_retrievals_nobufs);  	_leave(" = -ENOBUFS"); @@ -584,17 +590,12 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,  	fscache_stat(&fscache_n_retrieval_ops); -	/* pin the netfs read context in case we need to do the actual netfs -	 * read because we've encountered a cache read failure */ -	fscache_get_context(object->cookie, op->context); -  	/* we wait for the operation to become active, and then process it  	 * *here*, in this thread, and not in the thread pool */  	ret = fscache_wait_for_operation_activation(  		object, &op->op,  		__fscache_stat(&fscache_n_retrieval_op_waits), -		__fscache_stat(&fscache_n_retrievals_object_dead), -		fscache_do_cancel_retrieval); +		__fscache_stat(&fscache_n_retrievals_object_dead));  	if (ret < 0)  		goto error; @@ -632,7 +633,7 @@ nobufs_unlock_dec:  	wake_cookie = __fscache_unuse_cookie(cookie);  nobufs_unlock:  	spin_unlock(&cookie->lock); -	kfree(op); +	fscache_put_retrieval(op);  	if (wake_cookie)  		__fscache_wake_unused_cookie(cookie);  nobufs: @@ -700,8 +701,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,  	ret = fscache_wait_for_operation_activation(  		object, &op->op,  		__fscache_stat(&fscache_n_alloc_op_waits), -		__fscache_stat(&fscache_n_allocs_object_dead), -		fscache_do_cancel_retrieval); +		__fscache_stat(&fscache_n_allocs_object_dead));  	if (ret < 0)  		goto error; @@ -726,7 +726,7 @@ nobufs_unlock_dec:  	wake_cookie = __fscache_unuse_cookie(cookie);  nobufs_unlock:  	spin_unlock(&cookie->lock); -	kfree(op); +	fscache_put_retrieval(op);  	if (wake_cookie)  		__fscache_wake_unused_cookie(cookie);  nobufs: @@ -944,7 +944,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,  	if (!op)  		goto nomem; -	fscache_operation_init(&op->op, fscache_write_op, +	fscache_operation_init(&op->op, fscache_write_op, NULL,  			       fscache_release_write_op);  	op->op.flags = FSCACHE_OP_ASYNC |  		(1 << FSCACHE_OP_WAITING) | @@ -1016,7 +1016,7 @@ already_pending:  	spin_unlock(&object->lock);  	spin_unlock(&cookie->lock);  	radix_tree_preload_end(); -	kfree(op); +	fscache_put_operation(&op->op);  	fscache_stat(&fscache_n_stores_ok);  	_leave(" = 0");  	return 0; @@ -1036,7 +1036,7 @@ nobufs_unlock_obj:  nobufs:  	spin_unlock(&cookie->lock);  	radix_tree_preload_end(); -	kfree(op); +	fscache_put_operation(&op->op);  	if (wake_cookie)  		__fscache_wake_unused_cookie(cookie);  	fscache_stat(&fscache_n_stores_nobufs); @@ -1044,7 +1044,7 @@ nobufs:  	return -ENOBUFS;  nomem_free: -	kfree(op); +	fscache_put_operation(&op->op);  nomem:  	fscache_stat(&fscache_n_stores_oom);  	_leave(" = -ENOMEM"); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 40d13c70ef51..7cfa0aacdf6d 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -23,6 +23,7 @@ atomic_t fscache_n_op_run;  atomic_t fscache_n_op_enqueue;  atomic_t fscache_n_op_requeue;  atomic_t fscache_n_op_deferred_release; +atomic_t fscache_n_op_initialised;  atomic_t fscache_n_op_release;  atomic_t fscache_n_op_gc;  atomic_t fscache_n_op_cancelled; @@ -130,6 +131,11 @@ atomic_t fscache_n_cop_write_page;  atomic_t fscache_n_cop_uncache_page;  atomic_t fscache_n_cop_dissociate_pages; +atomic_t fscache_n_cache_no_space_reject; +atomic_t fscache_n_cache_stale_objects; +atomic_t fscache_n_cache_retired_objects; +atomic_t fscache_n_cache_culled_objects; +  /*   * display the general statistics   */ @@ -246,7 +252,8 @@ static int fscache_stats_show(struct seq_file *m, void *v)  		   atomic_read(&fscache_n_op_enqueue),  		   atomic_read(&fscache_n_op_cancelled),  		   atomic_read(&fscache_n_op_rejected)); -	seq_printf(m, "Ops    : dfr=%u rel=%u gc=%u\n", +	seq_printf(m, "Ops    : ini=%u dfr=%u rel=%u gc=%u\n", +		   atomic_read(&fscache_n_op_initialised),  		   atomic_read(&fscache_n_op_deferred_release),  		   atomic_read(&fscache_n_op_release),  		   atomic_read(&fscache_n_op_gc)); @@ -271,6 +278,11 @@ static int fscache_stats_show(struct seq_file *m, void *v)  		   atomic_read(&fscache_n_cop_write_page),  		   atomic_read(&fscache_n_cop_uncache_page),  		   atomic_read(&fscache_n_cop_dissociate_pages)); +	seq_printf(m, "CacheEv: nsp=%d stl=%d rtr=%d cul=%d\n", +		   atomic_read(&fscache_n_cache_no_space_reject), +		   atomic_read(&fscache_n_cache_stale_objects), +		   atomic_read(&fscache_n_cache_retired_objects), +		   atomic_read(&fscache_n_cache_culled_objects));  	return 0;  } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 014fa8ba2b51..f523f2f04c19 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1169,7 +1169,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)  	if (err <= 0)  		goto out; -	err = file_remove_suid(file); +	err = file_remove_privs(file);  	if (err)  		goto out; diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 95d255219b1e..1f1c7dcbcc2f 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -252,7 +252,7 @@ extern void hfs_mark_mdb_dirty(struct super_block *sb);  #define __hfs_u_to_mtime(sec)	cpu_to_be32(sec + 2082844800U - sys_tz.tz_minuteswest * 60)  #define __hfs_m_to_utime(sec)	(be32_to_cpu(sec) - 2082844800U  + sys_tz.tz_minuteswest * 60) -#define HFS_I(inode)	(list_entry(inode, struct hfs_inode_info, vfs_inode)) +#define HFS_I(inode)	(container_of(inode, struct hfs_inode_info, vfs_inode))  #define HFS_SB(sb)	((struct hfs_sb_info *)(sb)->s_fs_info)  #define hfs_m_to_utime(time)	(struct timespec){ .tv_sec = __hfs_m_to_utime(time) } diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index b0441d65fa54..f91a1faf819e 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -263,7 +263,7 @@ struct hfsplus_inode_info {  static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode)  { -	return list_entry(inode, struct hfsplus_inode_info, vfs_inode); +	return container_of(inode, struct hfsplus_inode_info, vfs_inode);  }  /* diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index b63b75fa00e7..bb04b58d1d69 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -304,7 +304,7 @@ extern const struct address_space_operations hpfs_symlink_aops;  static inline struct hpfs_inode_info *hpfs_i(struct inode *inode)  { -	return list_entry(inode, struct hpfs_inode_info, vfs_inode); +	return container_of(inode, struct hpfs_inode_info, vfs_inode);  }  static inline struct hpfs_sb_info *hpfs_sb(struct super_block *sb) diff --git a/fs/inode.c b/fs/inode.c index 069721f0cc0e..d30640f7a193 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -841,7 +841,11 @@ unsigned int get_next_ino(void)  	}  #endif -	*p = ++res; +	res++; +	/* get_next_ino should not provide a 0 inode number */ +	if (unlikely(!res)) +		res++; +	*p = res;  	put_cpu_var(last_ino);  	return res;  } @@ -1674,7 +1678,31 @@ int should_remove_suid(struct dentry *dentry)  }  EXPORT_SYMBOL(should_remove_suid); -static int __remove_suid(struct dentry *dentry, int kill) +/* + * Return mask of changes for notify_change() that need to be done as a + * response to write or truncate. Return 0 if nothing has to be changed. + * Negative value on error (change should be denied). + */ +int dentry_needs_remove_privs(struct dentry *dentry) +{ +	struct inode *inode = d_inode(dentry); +	int mask = 0; +	int ret; + +	if (IS_NOSEC(inode)) +		return 0; + +	mask = should_remove_suid(dentry); +	ret = security_inode_need_killpriv(dentry); +	if (ret < 0) +		return ret; +	if (ret) +		mask |= ATTR_KILL_PRIV; +	return mask; +} +EXPORT_SYMBOL(dentry_needs_remove_privs); + +static int __remove_privs(struct dentry *dentry, int kill)  {  	struct iattr newattrs; @@ -1686,33 +1714,32 @@ static int __remove_suid(struct dentry *dentry, int kill)  	return notify_change(dentry, &newattrs, NULL);  } -int file_remove_suid(struct file *file) +/* + * Remove special file priviledges (suid, capabilities) when file is written + * to or truncated. + */ +int file_remove_privs(struct file *file)  {  	struct dentry *dentry = file->f_path.dentry;  	struct inode *inode = d_inode(dentry); -	int killsuid; -	int killpriv; +	int kill;  	int error = 0;  	/* Fast path for nothing security related */  	if (IS_NOSEC(inode))  		return 0; -	killsuid = should_remove_suid(dentry); -	killpriv = security_inode_need_killpriv(dentry); - -	if (killpriv < 0) -		return killpriv; -	if (killpriv) -		error = security_inode_killpriv(dentry); -	if (!error && killsuid) -		error = __remove_suid(dentry, killsuid); -	if (!error && (inode->i_sb->s_flags & MS_NOSEC)) -		inode->i_flags |= S_NOSEC; +	kill = file_needs_remove_privs(file); +	if (kill < 0) +		return kill; +	if (kill) +		error = __remove_privs(dentry, kill); +	if (!error) +		inode_has_no_xattr(inode);  	return error;  } -EXPORT_SYMBOL(file_remove_suid); +EXPORT_SYMBOL(file_remove_privs);  /**   *	file_update_time	-	update mtime and ctime time @@ -1967,9 +1994,8 @@ EXPORT_SYMBOL(inode_dio_wait);   * inode is being instantiated).  The reason for the cmpxchg() loop   * --- which wouldn't be necessary if all code paths which modify   * i_flags actually followed this rule, is that there is at least one - * code path which doesn't today --- for example, - * __generic_file_aio_write() calls file_remove_suid() without holding - * i_mutex --- so we use cmpxchg() out of an abundance of caution. + * code path which doesn't today so we use cmpxchg() out of an abundance + * of caution.   *   * In the long run, i_mutex is overkill, and we should probably look   * at using the i_lock spinlock to protect i_flags, and then make sure diff --git a/fs/internal.h b/fs/internal.h index 01dce1d1476b..4d5af583ab03 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -107,6 +107,7 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,  extern long do_handle_open(int mountdirfd,  			   struct file_handle __user *ufh, int open_flag);  extern int open_check_o_direct(struct file *f); +extern int vfs_open(const struct path *, struct file *, const struct cred *);  /*   * inode.c diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index d200a9b8fd5e..824e61ede465 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -19,7 +19,7 @@  struct kstatfs;  struct kvec; -#define JFFS2_INODE_INFO(i) (list_entry(i, struct jffs2_inode_info, vfs_inode)) +#define JFFS2_INODE_INFO(i) (container_of(i, struct jffs2_inode_info, vfs_inode))  #define OFNI_EDONI_2SFFJ(f)  (&(f)->vfs_inode)  #define JFFS2_SB_INFO(sb) (sb->s_fs_info)  #define OFNI_BS_2SFFJ(c)  ((struct super_block *)c->os_priv) diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index fa7e795bd8ae..1f26d1910409 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -206,7 +206,7 @@ struct jfs_sb_info {  static inline struct jfs_inode_info *JFS_IP(struct inode *inode)  { -	return list_entry(inode, struct jfs_inode_info, vfs_inode); +	return container_of(inode, struct jfs_inode_info, vfs_inode);  }  static inline int jfs_dirtable_inline(struct inode *inode) diff --git a/fs/libfs.c b/fs/libfs.c index 88a4cb418756..102edfd39000 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -20,11 +20,6 @@  #include "internal.h" -static inline int simple_positive(struct dentry *dentry) -{ -	return d_really_is_positive(dentry) && !d_unhashed(dentry); -} -  int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,  		   struct kstat *stat)  { diff --git a/fs/minix/dir.c b/fs/minix/dir.c index 118e4e7bc935..d19ac258105a 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -45,11 +45,6 @@ minix_last_byte(struct inode *inode, unsigned long page_nr)  	return last_byte;  } -static inline unsigned long dir_pages(struct inode *inode) -{ -	return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} -  static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len)  {  	struct address_space *mapping = page->mapping; diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 1ebd11854622..01ad81dcacc5 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -84,7 +84,7 @@ static inline struct minix_sb_info *minix_sb(struct super_block *sb)  static inline struct minix_inode_info *minix_i(struct inode *inode)  { -	return list_entry(inode, struct minix_inode_info, vfs_inode); +	return container_of(inode, struct minix_inode_info, vfs_inode);  }  static inline unsigned minix_blocks_needed(unsigned bits, unsigned blocksize) diff --git a/fs/namei.c b/fs/namei.c index 2dad0eaf91d3..ae4e4c18b2ac 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -792,7 +792,7 @@ static void set_root(struct nameidata *nd)  	get_fs_root(current->fs, &nd->root);  } -static unsigned set_root_rcu(struct nameidata *nd) +static void set_root_rcu(struct nameidata *nd)  {  	struct fs_struct *fs = current->fs;  	unsigned seq; @@ -802,7 +802,6 @@ static unsigned set_root_rcu(struct nameidata *nd)  		nd->root = fs->root;  		nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq);  	} while (read_seqcount_retry(&fs->seq, seq)); -	return nd->root_seq;  }  static void path_put_conditional(struct path *path, struct nameidata *nd) @@ -1998,7 +1997,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags)  	if (*s == '/') {  		if (flags & LOOKUP_RCU) {  			rcu_read_lock(); -			nd->seq = set_root_rcu(nd); +			set_root_rcu(nd); +			nd->seq = nd->root_seq;  		} else {  			set_root(nd);  			path_get(&nd->root); diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 80021c709af9..93575e91a7aa 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -1145,6 +1145,8 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,  		case 0x00:  			ncp_dbg(1, "renamed %pd -> %pd\n",  				old_dentry, new_dentry); +			ncp_d_prune(old_dentry); +			ncp_d_prune(new_dentry);  			break;  		case 0x9E:  			error = -ENAMETOOLONG; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 21457bb0edd6..547308a5ec6f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1768,7 +1768,7 @@ EXPORT_SYMBOL_GPL(nfs_mkdir);  static void nfs_dentry_handle_enoent(struct dentry *dentry)  { -	if (d_really_is_positive(dentry) && !d_unhashed(dentry)) +	if (simple_positive(dentry))  		d_delete(dentry);  } diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 0ee0bed3649b..6b8b92b19cec 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -61,11 +61,6 @@ static inline void nilfs_put_page(struct page *page)  	page_cache_release(page);  } -static inline unsigned long dir_pages(struct inode *inode) -{ -	return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} -  /*   * Return the offset into page `page_nr' of the last valid   * byte in that page, plus one. diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 258d9fe2521a..4a73d6dffabf 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -307,31 +307,13 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping,  static ssize_t  nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)  { -	struct file *file = iocb->ki_filp; -	struct address_space *mapping = file->f_mapping; -	struct inode *inode = file->f_mapping->host; -	size_t count = iov_iter_count(iter); -	ssize_t size; +	struct inode *inode = file_inode(iocb->ki_filp);  	if (iov_iter_rw(iter) == WRITE)  		return 0;  	/* Needs synchronization with the cleaner */ -	size = blockdev_direct_IO(iocb, inode, iter, offset, nilfs_get_block); - -	/* -	 * In case of error extending write may have instantiated a few -	 * blocks outside i_size. Trim these off again. -	 */ -	if (unlikely(iov_iter_rw(iter) == WRITE && size < 0)) { -		loff_t isize = i_size_read(inode); -		loff_t end = offset + count; - -		if (end > isize) -			nilfs_write_failed(mapping, end); -	} - -	return size; +	return blockdev_direct_IO(iocb, inode, iter, offset, nilfs_get_block);  }  const struct address_space_operations nilfs_aops = { diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 2cd653670764..262561fea923 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -382,7 +382,7 @@ static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb,  	base_ni = ni;  	if (NInoAttr(ni))  		base_ni = ni->ext.base_ntfs_ino; -	err = file_remove_suid(file); +	err = file_remove_privs(file);  	if (unlikely(err))  		goto out;  	/* diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index 76b6cfb579d7..b3c3469de6cb 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h @@ -239,7 +239,7 @@ typedef struct {   */  static inline ntfs_inode *NTFS_I(struct inode *inode)  { -	return (ntfs_inode *)list_entry(inode, big_ntfs_inode, vfs_inode); +	return (ntfs_inode *)container_of(inode, big_ntfs_inode, vfs_inode);  }  static inline struct inode *VFS_I(ntfs_inode *ni) diff --git a/fs/open.c b/fs/open.c index e0250bdcc440..e33dab287fa0 100644 --- a/fs/open.c +++ b/fs/open.c @@ -51,8 +51,10 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,  		newattrs.ia_valid |= ATTR_FILE;  	} -	/* Remove suid/sgid on truncate too */ -	ret = should_remove_suid(dentry); +	/* Remove suid, sgid, and file capabilities on truncate too */ +	ret = dentry_needs_remove_privs(dentry); +	if (ret < 0) +		return ret;  	if (ret)  		newattrs.ia_valid |= ret | ATTR_FORCE; @@ -678,18 +680,18 @@ int open_check_o_direct(struct file *f)  }  static int do_dentry_open(struct file *f, +			  struct inode *inode,  			  int (*open)(struct inode *, struct file *),  			  const struct cred *cred)  {  	static const struct file_operations empty_fops = {}; -	struct inode *inode;  	int error;  	f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |  				FMODE_PREAD | FMODE_PWRITE;  	path_get(&f->f_path); -	inode = f->f_inode = f->f_path.dentry->d_inode; +	f->f_inode = inode;  	f->f_mapping = inode->i_mapping;  	if (unlikely(f->f_flags & O_PATH)) { @@ -793,7 +795,8 @@ int finish_open(struct file *file, struct dentry *dentry,  	BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */  	file->f_path.dentry = dentry; -	error = do_dentry_open(file, open, current_cred()); +	error = do_dentry_open(file, d_backing_inode(dentry), open, +			       current_cred());  	if (!error)  		*opened |= FILE_OPENED; @@ -822,6 +825,34 @@ int finish_no_open(struct file *file, struct dentry *dentry)  }  EXPORT_SYMBOL(finish_no_open); +char *file_path(struct file *filp, char *buf, int buflen) +{ +	return d_path(&filp->f_path, buf, buflen); +} +EXPORT_SYMBOL(file_path); + +/** + * vfs_open - open the file at the given path + * @path: path to open + * @file: newly allocated file with f_flag initialized + * @cred: credentials to use + */ +int vfs_open(const struct path *path, struct file *file, +	     const struct cred *cred) +{ +	struct dentry *dentry = path->dentry; +	struct inode *inode = dentry->d_inode; + +	file->f_path = *path; +	if (dentry->d_flags & DCACHE_OP_SELECT_INODE) { +		inode = dentry->d_op->d_select_inode(dentry, file->f_flags); +		if (IS_ERR(inode)) +			return PTR_ERR(inode); +	} + +	return do_dentry_open(file, inode, NULL, cred); +} +  struct file *dentry_open(const struct path *path, int flags,  			 const struct cred *cred)  { @@ -853,26 +884,6 @@ struct file *dentry_open(const struct path *path, int flags,  }  EXPORT_SYMBOL(dentry_open); -/** - * vfs_open - open the file at the given path - * @path: path to open - * @filp: newly allocated file with f_flag initialized - * @cred: credentials to use - */ -int vfs_open(const struct path *path, struct file *filp, -	     const struct cred *cred) -{ -	struct inode *inode = path->dentry->d_inode; - -	if (inode->i_op->dentry_open) -		return inode->i_op->dentry_open(path->dentry, filp, cred); -	else { -		filp->f_path = *path; -		return do_dentry_open(filp, NULL, cred); -	} -} -EXPORT_SYMBOL(vfs_open); -  static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)  {  	int lookup_flags = 0; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 308379b2d0b2..f140e3dbfb7b 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -337,37 +337,30 @@ static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,  	return true;  } -static int ovl_dentry_open(struct dentry *dentry, struct file *file, -		    const struct cred *cred) +struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags)  {  	int err;  	struct path realpath;  	enum ovl_path_type type; -	bool want_write = false;  	type = ovl_path_real(dentry, &realpath); -	if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) { -		want_write = true; +	if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) {  		err = ovl_want_write(dentry);  		if (err) -			goto out; +			return ERR_PTR(err); -		if (file->f_flags & O_TRUNC) +		if (file_flags & O_TRUNC)  			err = ovl_copy_up_last(dentry, NULL, true);  		else  			err = ovl_copy_up(dentry); +		ovl_drop_write(dentry);  		if (err) -			goto out_drop_write; +			return ERR_PTR(err);  		ovl_path_upper(dentry, &realpath);  	} -	err = vfs_open(&realpath, file, cred); -out_drop_write: -	if (want_write) -		ovl_drop_write(dentry); -out: -	return err; +	return d_backing_inode(realpath.dentry);  }  static const struct inode_operations ovl_file_inode_operations = { @@ -378,7 +371,6 @@ static const struct inode_operations ovl_file_inode_operations = {  	.getxattr	= ovl_getxattr,  	.listxattr	= ovl_listxattr,  	.removexattr	= ovl_removexattr, -	.dentry_open	= ovl_dentry_open,  };  static const struct inode_operations ovl_symlink_inode_operations = { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 17ac5afc9ffb..ea5a40b06e3a 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -173,6 +173,7 @@ ssize_t ovl_getxattr(struct dentry *dentry, const char *name,  		     void *value, size_t size);  ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);  int ovl_removexattr(struct dentry *dentry, const char *name); +struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags);  struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,  			    struct ovl_entry *oe); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 8a08c582bc22..7466ff339c66 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -316,6 +316,7 @@ static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)  static const struct dentry_operations ovl_dentry_operations = {  	.d_release = ovl_dentry_release, +	.d_select_inode = ovl_d_select_inode,  };  static const struct dentry_operations ovl_reval_dentry_operations = { diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 84bb65b83570..4fb17ded7d47 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -547,51 +547,45 @@ posix_acl_create(struct inode *dir, umode_t *mode,  		struct posix_acl **default_acl, struct posix_acl **acl)  {  	struct posix_acl *p; +	struct posix_acl *clone;  	int ret; +	*acl = NULL; +	*default_acl = NULL; +  	if (S_ISLNK(*mode) || !IS_POSIXACL(dir)) -		goto no_acl; +		return 0;  	p = get_acl(dir, ACL_TYPE_DEFAULT); -	if (IS_ERR(p)) { -		if (p == ERR_PTR(-EOPNOTSUPP)) -			goto apply_umask; -		return PTR_ERR(p); +	if (!p || p == ERR_PTR(-EOPNOTSUPP)) { +		*mode &= ~current_umask(); +		return 0;  	} +	if (IS_ERR(p)) +		return PTR_ERR(p); -	if (!p) -		goto apply_umask; - -	*acl = posix_acl_clone(p, GFP_NOFS); -	if (!*acl) +	clone = posix_acl_clone(p, GFP_NOFS); +	if (!clone)  		goto no_mem; -	ret = posix_acl_create_masq(*acl, mode); +	ret = posix_acl_create_masq(clone, mode);  	if (ret < 0)  		goto no_mem_clone; -	if (ret == 0) { -		posix_acl_release(*acl); -		*acl = NULL; -	} +	if (ret == 0) +		posix_acl_release(clone); +	else +		*acl = clone; -	if (!S_ISDIR(*mode)) { +	if (!S_ISDIR(*mode))  		posix_acl_release(p); -		*default_acl = NULL; -	} else { +	else  		*default_acl = p; -	} -	return 0; -apply_umask: -	*mode &= ~current_umask(); -no_acl: -	*default_acl = NULL; -	*acl = NULL;  	return 0;  no_mem_clone: -	posix_acl_release(*acl); +	posix_acl_release(clone);  no_mem:  	posix_acl_release(p);  	return -ENOMEM; diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index d4a35746cab9..f8595e8b5cd0 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -64,7 +64,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)  	if (file) {  		seq_pad(m, ' '); -		seq_path(m, &file->f_path, ""); +		seq_file_path(m, file, "");  	}  	seq_putc(m, '\n'); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 6dee68d013ff..ca1e091881d4 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -310,7 +310,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)  	 */  	if (file) {  		seq_pad(m, ' '); -		seq_path(m, &file->f_path, "\n"); +		seq_file_path(m, file, "\n");  		goto done;  	} @@ -1509,7 +1509,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)  	if (file) {  		seq_puts(m, " file="); -		seq_path(m, &file->f_path, "\n\t= "); +		seq_file_path(m, file, "\n\t= ");  	} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {  		seq_puts(m, " heap");  	} else { diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 599ec2e20104..e0d64c92e4f6 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -180,7 +180,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,  	if (file) {  		seq_pad(m, ' '); -		seq_path(m, &file->f_path, ""); +		seq_file_path(m, file, "");  	} else if (mm) {  		pid_t tid = pid_of_stack(priv, vma, is_pid); diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c index 8d64bb5366bf..e1f37278cf97 100644 --- a/fs/qnx6/dir.c +++ b/fs/qnx6/dir.c @@ -32,11 +32,6 @@ static struct page *qnx6_get_page(struct inode *dir, unsigned long n)  	return page;  } -static inline unsigned long dir_pages(struct inode *inode) -{ -	return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} -  static unsigned last_entry(struct inode *inode, unsigned long page_nr)  {  	unsigned long last_byte = inode->i_size; diff --git a/fs/seq_file.c b/fs/seq_file.c index 1d9c1cbd4d0b..ce9e39fd5daf 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -490,6 +490,20 @@ int seq_path(struct seq_file *m, const struct path *path, const char *esc)  }  EXPORT_SYMBOL(seq_path); +/** + * seq_file_path - seq_file interface to print a pathname of a file + * @m: the seq_file handle + * @file: the struct file to print + * @esc: set of characters to escape in the output + * + * return the absolute path to the file. + */ +int seq_file_path(struct seq_file *m, struct file *file, const char *esc) +{ +	return seq_path(m, &file->f_path, esc); +} +EXPORT_SYMBOL(seq_file_path); +  /*   * Same as seq_path, but relative to supplied root.   */ diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h index 73588e7700ed..d09fcd6fb85d 100644 --- a/fs/squashfs/squashfs_fs_i.h +++ b/fs/squashfs/squashfs_fs_i.h @@ -49,6 +49,6 @@ struct squashfs_inode_info {  static inline struct squashfs_inode_info *squashfs_i(struct inode *inode)  { -	return list_entry(inode, struct squashfs_inode_info, vfs_inode); +	return container_of(inode, struct squashfs_inode_info, vfs_inode);  }  #endif diff --git a/fs/super.c b/fs/super.c index 928c20f47af9..b61372354f2b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -842,7 +842,7 @@ int get_anon_bdev(dev_t *p)  	else if (error)  		return -EAGAIN; -	if (dev == (1 << MINORBITS)) { +	if (dev >= (1 << MINORBITS)) {  		spin_lock(&unnamed_dev_lock);  		ida_remove(&unnamed_dev_ida, dev);  		if (unnamed_dev_start > dev) diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 8f3555f00c54..63c1bcb224ee 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -33,11 +33,6 @@ static inline void dir_put_page(struct page *page)  	page_cache_release(page);  } -static inline unsigned long dir_pages(struct inode *inode) -{ -	return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} -  static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len)  {  	struct address_space *mapping = page->mapping; diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 2c13525131cd..6c212288adcb 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -73,7 +73,7 @@ struct sysv_inode_info {  static inline struct sysv_inode_info *SYSV_I(struct inode *inode)  { -	return list_entry(inode, struct sysv_inode_info, vfs_inode); +	return container_of(inode, struct sysv_inode_info, vfs_inode);  }  static inline struct sysv_sb_info *SYSV_SB(struct super_block *sb) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index a43df11a163f..cbc8d5d2755a 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -496,16 +496,11 @@ struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *pare  	return dentry;  } -static inline int tracefs_positive(struct dentry *dentry) -{ -	return dentry->d_inode && !d_unhashed(dentry); -} -  static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)  {  	int ret = 0; -	if (tracefs_positive(dentry)) { +	if (simple_positive(dentry)) {  		if (dentry->d_inode) {  			dget(dentry);  			switch (dentry->d_inode->i_mode & S_IFMT) { @@ -582,7 +577,7 @@ void tracefs_remove_recursive(struct dentry *dentry)  	 */  	spin_lock(&parent->d_lock);  	list_for_each_entry(child, &parent->d_subdirs, d_child) { -		if (!tracefs_positive(child)) +		if (!simple_positive(child))  			continue;  		/* perhaps simple_empty(child) makes more sense */ @@ -603,7 +598,7 @@ void tracefs_remove_recursive(struct dentry *dentry)  		 * from d_subdirs. When releasing the parent->d_lock we can  		 * no longer trust that the next pointer is valid.  		 * Restart the loop. We'll skip this one with the -		 * tracefs_positive() check. +		 * simple_positive() check.  		 */  		goto loop;  	} diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h index b5cd8ed2aa12..b1b9a63d8cf3 100644 --- a/fs/udf/udf_i.h +++ b/fs/udf/udf_i.h @@ -56,7 +56,7 @@ struct udf_inode_info {  static inline struct udf_inode_info *UDF_I(struct inode *inode)  { -	return list_entry(inode, struct udf_inode_info, vfs_inode); +	return container_of(inode, struct udf_inode_info, vfs_inode);  }  #endif /* _UDF_I_H) */ diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 2c1036080d52..a7106eda5024 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -51,8 +51,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)  	if (ufs_fragnum(fragment) + count > uspi->s_fpg)  		ufs_error (sb, "ufs_free_fragments", "internal error"); -	 -	lock_ufs(sb); + +	mutex_lock(&UFS_SB(sb)->s_lock);  	cgno = ufs_dtog(uspi, fragment);  	bit = ufs_dtogd(uspi, fragment); @@ -115,13 +115,13 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)  	if (sb->s_flags & MS_SYNCHRONOUS)  		ubh_sync_block(UCPI_UBH(ucpi));  	ufs_mark_sb_dirty(sb); -	 -	unlock_ufs(sb); + +	mutex_unlock(&UFS_SB(sb)->s_lock);  	UFSD("EXIT\n");  	return;  failed: -	unlock_ufs(sb); +	mutex_unlock(&UFS_SB(sb)->s_lock);  	UFSD("EXIT (FAILED)\n");  	return;  } @@ -151,7 +151,7 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count)  		goto failed;  	} -	lock_ufs(sb); +	mutex_lock(&UFS_SB(sb)->s_lock);  do_more:  	overflow = 0; @@ -211,12 +211,12 @@ do_more:  	}  	ufs_mark_sb_dirty(sb); -	unlock_ufs(sb); +	mutex_unlock(&UFS_SB(sb)->s_lock);  	UFSD("EXIT\n");  	return;  failed_unlock: -	unlock_ufs(sb); +	mutex_unlock(&UFS_SB(sb)->s_lock);  failed:  	UFSD("EXIT (FAILED)\n");  	return; @@ -357,7 +357,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,  	usb1 = ubh_get_usb_first(uspi);  	*err = -ENOSPC; -	lock_ufs(sb); +	mutex_lock(&UFS_SB(sb)->s_lock);  	tmp = ufs_data_ptr_to_cpu(sb, p);  	if (count + ufs_fragnum(fragment) > uspi->s_fpb) { @@ -378,19 +378,19 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,  				  "fragment %llu, tmp %llu\n",  				  (unsigned long long)fragment,  				  (unsigned long long)tmp); -			unlock_ufs(sb); +			mutex_unlock(&UFS_SB(sb)->s_lock);  			return INVBLOCK;  		}  		if (fragment < UFS_I(inode)->i_lastfrag) {  			UFSD("EXIT (ALREADY ALLOCATED)\n"); -			unlock_ufs(sb); +			mutex_unlock(&UFS_SB(sb)->s_lock);  			return 0;  		}  	}  	else {  		if (tmp) {  			UFSD("EXIT (ALREADY ALLOCATED)\n"); -			unlock_ufs(sb); +			mutex_unlock(&UFS_SB(sb)->s_lock);  			return 0;  		}  	} @@ -399,7 +399,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,  	 * There is not enough space for user on the device  	 */  	if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) { -		unlock_ufs(sb); +		mutex_unlock(&UFS_SB(sb)->s_lock);  		UFSD("EXIT (FAILED)\n");  		return 0;  	} @@ -424,7 +424,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,  			ufs_clear_frags(inode, result + oldcount,  					newcount - oldcount, locked_page != NULL);  		} -		unlock_ufs(sb); +		mutex_unlock(&UFS_SB(sb)->s_lock);  		UFSD("EXIT, result %llu\n", (unsigned long long)result);  		return result;  	} @@ -439,7 +439,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,  						fragment + count);  		ufs_clear_frags(inode, result + oldcount, newcount - oldcount,  				locked_page != NULL); -		unlock_ufs(sb); +		mutex_unlock(&UFS_SB(sb)->s_lock);  		UFSD("EXIT, result %llu\n", (unsigned long long)result);  		return result;  	} @@ -477,7 +477,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,  		*err = 0;  		UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,  						fragment + count); -		unlock_ufs(sb); +		mutex_unlock(&UFS_SB(sb)->s_lock);  		if (newcount < request)  			ufs_free_fragments (inode, result + newcount, request - newcount);  		ufs_free_fragments (inode, tmp, oldcount); @@ -485,7 +485,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,  		return result;  	} -	unlock_ufs(sb); +	mutex_unlock(&UFS_SB(sb)->s_lock);  	UFSD("EXIT (FAILED)\n");  	return 0;  }		 diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 1bfe8cabff0f..74f2e80288bf 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -65,11 +65,6 @@ static inline void ufs_put_page(struct page *page)  	page_cache_release(page);  } -static inline unsigned long ufs_dir_pages(struct inode *inode) -{ -	return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} -  ino_t ufs_inode_by_name(struct inode *dir, const struct qstr *qstr)  {  	ino_t res = 0; @@ -87,7 +82,8 @@ ino_t ufs_inode_by_name(struct inode *dir, const struct qstr *qstr)  /* Releases the page */  void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, -		  struct page *page, struct inode *inode) +		  struct page *page, struct inode *inode, +		  bool update_times)  {  	loff_t pos = page_offset(page) +  			(char *) de - (char *) page_address(page); @@ -103,7 +99,8 @@ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,  	err = ufs_commit_chunk(page, pos, len);  	ufs_put_page(page); -	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; +	if (update_times) +		dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;  	mark_inode_dirty(dir);  } @@ -256,7 +253,7 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,  	int namelen = qstr->len;  	unsigned reclen = UFS_DIR_REC_LEN(namelen);  	unsigned long start, n; -	unsigned long npages = ufs_dir_pages(dir); +	unsigned long npages = dir_pages(dir);  	struct page *page = NULL;  	struct ufs_inode_info *ui = UFS_I(dir);  	struct ufs_dir_entry *de; @@ -320,7 +317,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)  	unsigned short rec_len, name_len;  	struct page *page = NULL;  	struct ufs_dir_entry *de; -	unsigned long npages = ufs_dir_pages(dir); +	unsigned long npages = dir_pages(dir);  	unsigned long n;  	char *kaddr;  	loff_t pos; @@ -437,7 +434,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx)  	struct super_block *sb = inode->i_sb;  	unsigned int offset = pos & ~PAGE_CACHE_MASK;  	unsigned long n = pos >> PAGE_CACHE_SHIFT; -	unsigned long npages = ufs_dir_pages(inode); +	unsigned long npages = dir_pages(inode);  	unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);  	int need_revalidate = file->f_version != inode->i_version;  	unsigned flags = UFS_SB(sb)->s_flags; @@ -608,7 +605,7 @@ int ufs_empty_dir(struct inode * inode)  {  	struct super_block *sb = inode->i_sb;  	struct page *page = NULL; -	unsigned long i, npages = ufs_dir_pages(inode); +	unsigned long i, npages = dir_pages(inode);  	for (i = 0; i < npages; i++) {  		char *kaddr; diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 7caa01652888..fd0203ce1f7f 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -69,11 +69,11 @@ void ufs_free_inode (struct inode * inode)  	ino = inode->i_ino; -	lock_ufs(sb); +	mutex_lock(&UFS_SB(sb)->s_lock);  	if (!((ino > 1) && (ino < (uspi->s_ncg * uspi->s_ipg )))) {  		ufs_warning(sb, "ufs_free_inode", "reserved inode or nonexistent inode %u\n", ino); -		unlock_ufs(sb); +		mutex_unlock(&UFS_SB(sb)->s_lock);  		return;  	} @@ -81,7 +81,7 @@ void ufs_free_inode (struct inode * inode)  	bit = ufs_inotocgoff (ino);  	ucpi = ufs_load_cylinder (sb, cg);  	if (!ucpi) { -		unlock_ufs(sb); +		mutex_unlock(&UFS_SB(sb)->s_lock);  		return;  	}  	ucg = ubh_get_ucg(UCPI_UBH(ucpi)); @@ -115,7 +115,7 @@ void ufs_free_inode (struct inode * inode)  		ubh_sync_block(UCPI_UBH(ucpi));  	ufs_mark_sb_dirty(sb); -	unlock_ufs(sb); +	mutex_unlock(&UFS_SB(sb)->s_lock);  	UFSD("EXIT\n");  } @@ -193,7 +193,7 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode)  	sbi = UFS_SB(sb);  	uspi = sbi->s_uspi; -	lock_ufs(sb); +	mutex_lock(&sbi->s_lock);  	/*  	 * Try to place the inode in its parent directory @@ -331,21 +331,21 @@ cg_found:  			sync_dirty_buffer(bh);  		brelse(bh);  	} -	unlock_ufs(sb); +	mutex_unlock(&sbi->s_lock);  	UFSD("allocating inode %lu\n", inode->i_ino);  	UFSD("EXIT\n");  	return inode;  fail_remove_inode: -	unlock_ufs(sb); +	mutex_unlock(&sbi->s_lock);  	clear_nlink(inode);  	unlock_new_inode(inode);  	iput(inode);  	UFSD("EXIT (FAILED): err %d\n", err);  	return ERR_PTR(err);  failed: -	unlock_ufs(sb); +	mutex_unlock(&sbi->s_lock);  	make_bad_inode(inode);  	iput (inode);  	UFSD("EXIT (FAILED): err %d\n", err); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 99aaf5c9bf4d..f913a6924b23 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -903,6 +903,9 @@ void ufs_evict_inode(struct inode * inode)  	invalidate_inode_buffers(inode);  	clear_inode(inode); -	if (want_delete) +	if (want_delete) { +		lock_ufs(inode->i_sb);  		ufs_free_inode(inode); +		unlock_ufs(inode->i_sb); +	}  } diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index f773deb1d2e3..47966554317c 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -56,11 +56,9 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, unsi  	if (dentry->d_name.len > UFS_MAXNAMLEN)  		return ERR_PTR(-ENAMETOOLONG); -	lock_ufs(dir->i_sb);  	ino = ufs_inode_by_name(dir, &dentry->d_name);  	if (ino)  		inode = ufs_iget(dir->i_sb, ino); -	unlock_ufs(dir->i_sb);  	return d_splice_alias(inode, dentry);  } @@ -76,24 +74,16 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode,  		bool excl)  {  	struct inode *inode; -	int err; - -	UFSD("BEGIN\n");  	inode = ufs_new_inode(dir, mode); -	err = PTR_ERR(inode); +	if (IS_ERR(inode)) +		return PTR_ERR(inode); -	if (!IS_ERR(inode)) { -		inode->i_op = &ufs_file_inode_operations; -		inode->i_fop = &ufs_file_operations; -		inode->i_mapping->a_ops = &ufs_aops; -		mark_inode_dirty(inode); -		lock_ufs(dir->i_sb); -		err = ufs_add_nondir(dentry, inode); -		unlock_ufs(dir->i_sb); -	} -	UFSD("END: err=%d\n", err); -	return err; +	inode->i_op = &ufs_file_inode_operations; +	inode->i_fop = &ufs_file_operations; +	inode->i_mapping->a_ops = &ufs_aops; +	mark_inode_dirty(inode); +	return ufs_add_nondir(dentry, inode);  }  static int ufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) @@ -110,9 +100,7 @@ static int ufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev  		init_special_inode(inode, mode, rdev);  		ufs_set_inode_dev(inode->i_sb, UFS_I(inode), rdev);  		mark_inode_dirty(inode); -		lock_ufs(dir->i_sb);  		err = ufs_add_nondir(dentry, inode); -		unlock_ufs(dir->i_sb);  	}  	return err;  } @@ -121,19 +109,18 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,  	const char * symname)  {  	struct super_block * sb = dir->i_sb; -	int err = -ENAMETOOLONG; +	int err;  	unsigned l = strlen(symname)+1;  	struct inode * inode;  	if (l > sb->s_blocksize) -		goto out_notlocked; +		return -ENAMETOOLONG;  	inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO);  	err = PTR_ERR(inode);  	if (IS_ERR(inode)) -		goto out_notlocked; +		return err; -	lock_ufs(dir->i_sb);  	if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) {  		/* slow symlink */  		inode->i_op = &ufs_symlink_inode_operations; @@ -150,17 +137,13 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,  	}  	mark_inode_dirty(inode); -	err = ufs_add_nondir(dentry, inode); -out: -	unlock_ufs(dir->i_sb); -out_notlocked: -	return err; +	return ufs_add_nondir(dentry, inode);  out_fail:  	inode_dec_link_count(inode);  	unlock_new_inode(inode);  	iput(inode); -	goto out; +	return err;  }  static int ufs_link (struct dentry * old_dentry, struct inode * dir, @@ -169,14 +152,16 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,  	struct inode *inode = d_inode(old_dentry);  	int error; -	lock_ufs(dir->i_sb); -  	inode->i_ctime = CURRENT_TIME_SEC;  	inode_inc_link_count(inode);  	ihold(inode); -	error = ufs_add_nondir(dentry, inode); -	unlock_ufs(dir->i_sb); +	error = ufs_add_link(dentry, inode); +	if (error) { +		inode_dec_link_count(inode); +		iput(inode); +	} else +		d_instantiate(dentry, inode);  	return error;  } @@ -185,9 +170,12 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)  	struct inode * inode;  	int err; +	inode_inc_link_count(dir); +  	inode = ufs_new_inode(dir, S_IFDIR|mode); +	err = PTR_ERR(inode);  	if (IS_ERR(inode)) -		return PTR_ERR(inode); +		goto out_dir;  	inode->i_op = &ufs_dir_inode_operations;  	inode->i_fop = &ufs_dir_operations; @@ -195,9 +183,6 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)  	inode_inc_link_count(inode); -	lock_ufs(dir->i_sb); -	inode_inc_link_count(dir); -  	err = ufs_make_empty(inode, dir);  	if (err)  		goto out_fail; @@ -205,20 +190,19 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)  	err = ufs_add_link(dentry, inode);  	if (err)  		goto out_fail; -	unlock_ufs(dir->i_sb); +	unlock_new_inode(inode);  	d_instantiate(dentry, inode); -out: -	return err; +	return 0;  out_fail:  	inode_dec_link_count(inode);  	inode_dec_link_count(inode);  	unlock_new_inode(inode);  	iput (inode); +out_dir:  	inode_dec_link_count(dir); -	unlock_ufs(dir->i_sb); -	goto out; +	return err;  }  static int ufs_unlink(struct inode *dir, struct dentry *dentry) @@ -248,7 +232,6 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)  	struct inode * inode = d_inode(dentry);  	int err= -ENOTEMPTY; -	lock_ufs(dir->i_sb);  	if (ufs_empty_dir (inode)) {  		err = ufs_unlink(dir, dentry);  		if (!err) { @@ -257,7 +240,6 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)  			inode_dec_link_count(dir);  		}  	} -	unlock_ufs(dir->i_sb);  	return err;  } @@ -295,7 +277,7 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,  		new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);  		if (!new_de)  			goto out_dir; -		ufs_set_link(new_dir, new_de, new_page, old_inode); +		ufs_set_link(new_dir, new_de, new_page, old_inode, 1);  		new_inode->i_ctime = CURRENT_TIME_SEC;  		if (dir_de)  			drop_nlink(new_inode); @@ -318,7 +300,12 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,  	mark_inode_dirty(old_inode);  	if (dir_de) { -		ufs_set_link(old_inode, dir_de, dir_page, new_dir); +		if (old_dir != new_dir) +			ufs_set_link(old_inode, dir_de, dir_page, new_dir, 0); +		else { +			kunmap(dir_page); +			page_cache_release(dir_page); +		}  		inode_dec_link_count(old_dir);  	}  	return 0; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 098508a93c7b..250579a80d90 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -695,6 +695,7 @@ static int ufs_sync_fs(struct super_block *sb, int wait)  	unsigned flags;  	lock_ufs(sb); +	mutex_lock(&UFS_SB(sb)->s_lock);  	UFSD("ENTER\n"); @@ -712,6 +713,7 @@ static int ufs_sync_fs(struct super_block *sb, int wait)  	ufs_put_cstotal(sb);  	UFSD("EXIT\n"); +	mutex_unlock(&UFS_SB(sb)->s_lock);  	unlock_ufs(sb);  	return 0; @@ -800,6 +802,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)  	UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY));  	mutex_init(&sbi->mutex); +	mutex_init(&sbi->s_lock);  	spin_lock_init(&sbi->work_lock);  	INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);  	/* @@ -1278,6 +1281,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)  	sync_filesystem(sb);  	lock_ufs(sb); +	mutex_lock(&UFS_SB(sb)->s_lock);  	uspi = UFS_SB(sb)->s_uspi;  	flags = UFS_SB(sb)->s_flags;  	usb1 = ubh_get_usb_first(uspi); @@ -1291,6 +1295,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)  	new_mount_opt = 0;  	ufs_set_opt (new_mount_opt, ONERROR_LOCK);  	if (!ufs_parse_options (data, &new_mount_opt)) { +		mutex_unlock(&UFS_SB(sb)->s_lock);  		unlock_ufs(sb);  		return -EINVAL;  	} @@ -1298,12 +1303,14 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)  		new_mount_opt |= ufstype;  	} else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {  		pr_err("ufstype can't be changed during remount\n"); +		mutex_unlock(&UFS_SB(sb)->s_lock);  		unlock_ufs(sb);  		return -EINVAL;  	}  	if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {  		UFS_SB(sb)->s_mount_opt = new_mount_opt; +		mutex_unlock(&UFS_SB(sb)->s_lock);  		unlock_ufs(sb);  		return 0;  	} @@ -1327,6 +1334,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)  	 */  #ifndef CONFIG_UFS_FS_WRITE  		pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n"); +		mutex_unlock(&UFS_SB(sb)->s_lock);  		unlock_ufs(sb);  		return -EINVAL;  #else @@ -1336,11 +1344,13 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)  		    ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&  		    ufstype != UFS_MOUNT_UFSTYPE_UFS2) {  			pr_err("this ufstype is read-only supported\n"); +			mutex_unlock(&UFS_SB(sb)->s_lock);  			unlock_ufs(sb);  			return -EINVAL;  		}  		if (!ufs_read_cylinder_structures(sb)) {  			pr_err("failed during remounting\n"); +			mutex_unlock(&UFS_SB(sb)->s_lock);  			unlock_ufs(sb);  			return -EPERM;  		} @@ -1348,6 +1358,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)  #endif  	}  	UFS_SB(sb)->s_mount_opt = new_mount_opt; +	mutex_unlock(&UFS_SB(sb)->s_lock);  	unlock_ufs(sb);  	return 0;  } diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 2a07396d5f9e..2e31ea2e35a3 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -30,6 +30,7 @@ struct ufs_sb_info {  	int work_queued; /* non-zero if the delayed work is queued */  	struct delayed_work sync_work; /* FS sync delayed work */  	spinlock_t work_lock; /* protects sync_work and work_queued */ +	struct mutex s_lock;  };  struct ufs_inode_info { @@ -105,7 +106,7 @@ extern int ufs_delete_entry(struct inode *, struct ufs_dir_entry *, struct page  extern int ufs_empty_dir (struct inode *);  extern struct ufs_dir_entry *ufs_dotdot(struct inode *, struct page **);  extern void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, -			 struct page *page, struct inode *inode); +			 struct page *page, struct inode *inode, bool update_times);  /* file.c */  extern const struct inode_operations ufs_file_inode_operations; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 874507de3485..f0e8249722d4 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -577,6 +577,13 @@ restart:  	if (error)  		return error; +	/* For changing security info in file_remove_privs() we need i_mutex */ +	if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) { +		xfs_rw_iunlock(ip, *iolock); +		*iolock = XFS_IOLOCK_EXCL; +		xfs_rw_ilock(ip, *iolock); +		goto restart; +	}  	/*  	 * If the offset is beyond the size of the file, we need to zero any  	 * blocks that fall between the existing EOF and the start of this @@ -637,7 +644,9 @@ restart:  	 * setgid bits if the process is not being run by root.  This keeps  	 * people from modifying setuid and setgid binaries.  	 */ -	return file_remove_suid(file); +	if (!IS_NOSEC(inode)) +		return file_remove_privs(file); +	return 0;  }  /* diff --git a/include/linux/dcache.h b/include/linux/dcache.h index df334cbacc6d..d2d50249b7b2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -160,6 +160,7 @@ struct dentry_operations {  	char *(*d_dname)(struct dentry *, char *, int);  	struct vfsmount *(*d_automount)(struct path *);  	int (*d_manage)(struct dentry *, bool); +	struct inode *(*d_select_inode)(struct dentry *, unsigned);  } ____cacheline_aligned;  /* @@ -225,6 +226,7 @@ struct dentry_operations {  #define DCACHE_MAY_FREE			0x00800000  #define DCACHE_FALLTHRU			0x01000000 /* Fall through to lower layer */ +#define DCACHE_OP_SELECT_INODE		0x02000000 /* Unioned entry: dcache op selects inode */  extern seqlock_t rename_lock; @@ -505,6 +507,11 @@ static inline bool d_really_is_positive(const struct dentry *dentry)  	return dentry->d_inode != NULL;  } +static inline int simple_positive(struct dentry *dentry) +{ +	return d_really_is_positive(dentry) && !d_unhashed(dentry); +} +  extern void d_set_fallthru(struct dentry *dentry);  static inline bool d_is_fallthru(const struct dentry *dentry) diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 230f87bdf5ad..fbb88740634a 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -47,6 +47,9 @@ struct files_struct {     * read mostly part     */  	atomic_t count; +	bool resize_in_progress; +	wait_queue_head_t resize_wait; +  	struct fdtable __rcu *fdt;  	struct fdtable fdtab;    /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 8a81fcbb0074..a0653e560c26 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1654,7 +1654,6 @@ struct inode_operations {  	int (*set_acl)(struct inode *, struct posix_acl *, int);  	/* WARNING: probably going away soon, do not use! */ -	int (*dentry_open)(struct dentry *, struct file *, const struct cred *);  } ____cacheline_aligned;  ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, @@ -2213,7 +2212,6 @@ extern struct file *file_open_name(struct filename *, int, umode_t);  extern struct file *filp_open(const char *, int, umode_t);  extern struct file *file_open_root(struct dentry *, struct vfsmount *,  				   const char *, int); -extern int vfs_open(const struct path *, struct file *, const struct cred *);  extern struct file * dentry_open(const struct path *, int, const struct cred *);  extern int filp_close(struct file *, fl_owner_t id); @@ -2530,6 +2528,8 @@ extern struct file * open_exec(const char *);  extern int is_subdir(struct dentry *, struct dentry *);  extern int path_is_under(struct path *, struct path *); +extern char *file_path(struct file *, char *, int); +  #include <linux/err.h>  /* needed for stackable file system support */ @@ -2581,7 +2581,12 @@ extern struct inode *new_inode_pseudo(struct super_block *sb);  extern struct inode *new_inode(struct super_block *sb);  extern void free_inode_nonrcu(struct inode *inode);  extern int should_remove_suid(struct dentry *); -extern int file_remove_suid(struct file *); +extern int file_remove_privs(struct file *); +extern int dentry_needs_remove_privs(struct dentry *dentry); +static inline int file_needs_remove_privs(struct file *file) +{ +	return dentry_needs_remove_privs(file->f_path.dentry); +}  extern void __insert_inode_hash(struct inode *, unsigned long hashval);  static inline void insert_inode_hash(struct inode *inode) diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 771484993ca7..604e1526cd00 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -74,6 +74,7 @@ extern wait_queue_head_t fscache_cache_cleared_wq;   */  typedef void (*fscache_operation_release_t)(struct fscache_operation *op);  typedef void (*fscache_operation_processor_t)(struct fscache_operation *op); +typedef void (*fscache_operation_cancel_t)(struct fscache_operation *op);  enum fscache_operation_state {  	FSCACHE_OP_ST_BLANK,		/* Op is not yet submitted */ @@ -109,6 +110,9 @@ struct fscache_operation {  	 *   the op in a non-pool thread */  	fscache_operation_processor_t processor; +	/* Operation cancellation cleanup (optional) */ +	fscache_operation_cancel_t cancel; +  	/* operation releaser */  	fscache_operation_release_t release;  }; @@ -119,33 +123,17 @@ extern void fscache_op_work_func(struct work_struct *work);  extern void fscache_enqueue_operation(struct fscache_operation *);  extern void fscache_op_complete(struct fscache_operation *, bool);  extern void fscache_put_operation(struct fscache_operation *); - -/** - * fscache_operation_init - Do basic initialisation of an operation - * @op: The operation to initialise - * @release: The release function to assign - * - * Do basic initialisation of an operation.  The caller must still set flags, - * object and processor if needed. - */ -static inline void fscache_operation_init(struct fscache_operation *op, -					fscache_operation_processor_t processor, -					fscache_operation_release_t release) -{ -	INIT_WORK(&op->work, fscache_op_work_func); -	atomic_set(&op->usage, 1); -	op->state = FSCACHE_OP_ST_INITIALISED; -	op->debug_id = atomic_inc_return(&fscache_op_debug_id); -	op->processor = processor; -	op->release = release; -	INIT_LIST_HEAD(&op->pend_link); -} +extern void fscache_operation_init(struct fscache_operation *, +				   fscache_operation_processor_t, +				   fscache_operation_cancel_t, +				   fscache_operation_release_t);  /*   * data read operation   */  struct fscache_retrieval {  	struct fscache_operation op; +	struct fscache_cookie	*cookie;	/* The netfs cookie */  	struct address_space	*mapping;	/* netfs pages */  	fscache_rw_complete_t	end_io_func;	/* function to call on I/O completion */  	void			*context;	/* netfs read context (pinned) */ @@ -371,6 +359,7 @@ struct fscache_object {  #define FSCACHE_OBJECT_IS_LOOKED_UP	4	/* T if object has been looked up */  #define FSCACHE_OBJECT_IS_AVAILABLE	5	/* T if object has become active */  #define FSCACHE_OBJECT_RETIRED		6	/* T if object was retired on relinquishment */ +#define FSCACHE_OBJECT_KILLED_BY_CACHE	7	/* T if object was killed by the cache */  	struct list_head	cache_link;	/* link in cache->object_list */  	struct hlist_node	cookie_link;	/* link in cookie->backing_objects */ @@ -410,17 +399,16 @@ static inline bool fscache_object_is_available(struct fscache_object *object)  	return test_bit(FSCACHE_OBJECT_IS_AVAILABLE, &object->flags);  } -static inline bool fscache_object_is_active(struct fscache_object *object) +static inline bool fscache_cache_is_broken(struct fscache_object *object)  { -	return fscache_object_is_available(object) && -		fscache_object_is_live(object) && -		!test_bit(FSCACHE_IOERROR, &object->cache->flags); +	return test_bit(FSCACHE_IOERROR, &object->cache->flags);  } -static inline bool fscache_object_is_dead(struct fscache_object *object) +static inline bool fscache_object_is_active(struct fscache_object *object)  { -	return fscache_object_is_dying(object) && -		test_bit(FSCACHE_IOERROR, &object->cache->flags); +	return fscache_object_is_available(object) && +		fscache_object_is_live(object) && +		!fscache_cache_is_broken(object);  }  /** @@ -551,4 +539,15 @@ extern enum fscache_checkaux fscache_check_aux(struct fscache_object *object,  					       const void *data,  					       uint16_t datalen); +extern void fscache_object_retrying_stale(struct fscache_object *object); + +enum fscache_why_object_killed { +	FSCACHE_OBJECT_IS_STALE, +	FSCACHE_OBJECT_NO_SPACE, +	FSCACHE_OBJECT_WAS_RETIRED, +	FSCACHE_OBJECT_WAS_CULLED, +}; +extern void fscache_object_mark_killed(struct fscache_object *object, +				       enum fscache_why_object_killed why); +  #endif /* _LINUX_FSCACHE_CACHE_H */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index fb0814ca65c7..a6c78e00ea96 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -671,4 +671,10 @@ static inline int add_to_page_cache(struct page *page,  	return error;  } +static inline unsigned long dir_pages(struct inode *inode) +{ +	return (unsigned long)(inode->i_size + PAGE_CACHE_SIZE - 1) >> +			       PAGE_CACHE_SHIFT; +} +  #endif /* _LINUX_PAGEMAP_H */ diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index afbb1fd77c77..912a7c482649 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -123,6 +123,7 @@ __printf(2, 3) int seq_printf(struct seq_file *, const char *, ...);  __printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args);  int seq_path(struct seq_file *, const struct path *, const char *); +int seq_file_path(struct seq_file *, struct file *, const char *);  int seq_dentry(struct seq_file *, struct dentry *, const char *);  int seq_path_root(struct seq_file *m, const struct path *path,  		  const struct path *root, const char *esc); diff --git a/kernel/events/core.c b/kernel/events/core.c index d1f37ddd1960..e965cfae4207 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5794,7 +5794,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)  		 * need to add enough zero bytes after the string to handle  		 * the 64bit alignment we do later.  		 */ -		name = d_path(&file->f_path, buf, PATH_MAX - sizeof(u64)); +		name = file_path(file, buf, PATH_MAX - sizeof(u64));  		if (IS_ERR(name)) {  			name = "//toolong";  			goto cpy_name; diff --git a/mm/filemap.c b/mm/filemap.c index 11f10efd637c..1283fc825458 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2563,7 +2563,7 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)  	/* We can write back this queue in page reclaim */  	current->backing_dev_info = inode_to_bdi(inode); -	err = file_remove_suid(file); +	err = file_remove_privs(file);  	if (err)  		goto out; diff --git a/mm/memory.c b/mm/memory.c index 11b9ca176740..a84fbb772034 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3726,7 +3726,7 @@ void print_vma_addr(char *prefix, unsigned long ip)  		if (buf) {  			char *p; -			p = d_path(&f->f_path, buf, PAGE_SIZE); +			p = file_path(f, buf, PAGE_SIZE);  			if (IS_ERR(p))  				p = "?";  			printk("%s%s[%lx+%lx]", prefix, kbasename(p), diff --git a/mm/swapfile.c b/mm/swapfile.c index a7e72103f23b..41e4581af7c5 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2032,7 +2032,7 @@ static int swap_show(struct seq_file *swap, void *v)  	}  	file = si->swap_file; -	len = seq_path(swap, &file->f_path, " \t\n\\"); +	len = seq_file_path(swap, file, " \t\n\\");  	seq_printf(swap, "%*s%s\t%u\t%u\t%d\n",  			len < 40 ? 40 - len : 1, " ",  			S_ISBLK(file_inode(file)->i_mode) ? diff --git a/net/9p/client.c b/net/9p/client.c index 6f4c4c88db84..498454b3c06c 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -843,7 +843,8 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,  	if (err < 0) {  		if (err == -EIO)  			c->status = Disconnected; -		goto reterr; +		if (err != -ERESTARTSYS) +			goto reterr;  	}  	if (req->status == REQ_STATUS_ERROR) {  		p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); @@ -1582,6 +1583,10 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)  			p9_free_req(clnt, req);  			break;  		} +		if (rsize < count) { +			pr_err("bogus RREAD count (%d > %d)\n", count, rsize); +			count = rsize; +		}  		p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);  		if (!count) { @@ -1647,6 +1652,11 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)  		if (*err) {  			trace_9p_protocol_dump(clnt, req->rc);  			p9_free_req(clnt, req); +			break; +		} +		if (rsize < count) { +			pr_err("bogus RWRITE count (%d > %d)\n", count, rsize); +			count = rsize;  		}  		p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); diff --git a/security/inode.c b/security/inode.c index 0e37e4fba8fa..16622aef9bde 100644 --- a/security/inode.c +++ b/security/inode.c @@ -25,11 +25,6 @@  static struct vfsmount *mount;  static int mount_count; -static inline int positive(struct dentry *dentry) -{ -	return d_really_is_positive(dentry) && !d_unhashed(dentry); -} -  static int fill_super(struct super_block *sb, void *data, int silent)  {  	static struct tree_descr files[] = {{""}}; @@ -201,14 +196,12 @@ void securityfs_remove(struct dentry *dentry)  		return;  	mutex_lock(&d_inode(parent)->i_mutex); -	if (positive(dentry)) { -		if (d_really_is_positive(dentry)) { -			if (d_is_dir(dentry)) -				simple_rmdir(d_inode(parent), dentry); -			else -				simple_unlink(d_inode(parent), dentry); -			dput(dentry); -		} +	if (simple_positive(dentry)) { +		if (d_is_dir(dentry)) +			simple_rmdir(d_inode(parent), dentry); +		else +			simple_unlink(d_inode(parent), dentry); +		dput(dentry);  	}  	mutex_unlock(&d_inode(parent)->i_mutex);  	simple_release_fs(&mount, &mount_count); |