diff options
Diffstat (limited to 'fs')
39 files changed, 790 insertions, 215 deletions
@@ -141,6 +141,7 @@ struct kioctx { struct { unsigned tail; + unsigned completed_events; spinlock_t completion_lock; } ____cacheline_aligned_in_smp; @@ -857,6 +858,68 @@ out: return ret; } +/* refill_reqs_available + * Updates the reqs_available reference counts used for tracking the + * number of free slots in the completion ring. This can be called + * from aio_complete() (to optimistically update reqs_available) or + * from aio_get_req() (the we're out of events case). It must be + * called holding ctx->completion_lock. + */ +static void refill_reqs_available(struct kioctx *ctx, unsigned head, + unsigned tail) +{ + unsigned events_in_ring, completed; + + /* Clamp head since userland can write to it. */ + head %= ctx->nr_events; + if (head <= tail) + events_in_ring = tail - head; + else + events_in_ring = ctx->nr_events - (head - tail); + + completed = ctx->completed_events; + if (events_in_ring < completed) + completed -= events_in_ring; + else + completed = 0; + + if (!completed) + return; + + ctx->completed_events -= completed; + put_reqs_available(ctx, completed); +} + +/* user_refill_reqs_available + * Called to refill reqs_available when aio_get_req() encounters an + * out of space in the completion ring. + */ +static void user_refill_reqs_available(struct kioctx *ctx) +{ + spin_lock_irq(&ctx->completion_lock); + if (ctx->completed_events) { + struct aio_ring *ring; + unsigned head; + + /* Access of ring->head may race with aio_read_events_ring() + * here, but that's okay since whether we read the old version + * or the new version, and either will be valid. The important + * part is that head cannot pass tail since we prevent + * aio_complete() from updating tail by holding + * ctx->completion_lock. Even if head is invalid, the check + * against ctx->completed_events below will make sure we do the + * safe/right thing. + */ + ring = kmap_atomic(ctx->ring_pages[0]); + head = ring->head; + kunmap_atomic(ring); + + refill_reqs_available(ctx, head, ctx->tail); + } + + spin_unlock_irq(&ctx->completion_lock); +} + /* aio_get_req * Allocate a slot for an aio request. * Returns NULL if no requests are free. @@ -865,8 +928,11 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx) { struct kiocb *req; - if (!get_reqs_available(ctx)) - return NULL; + if (!get_reqs_available(ctx)) { + user_refill_reqs_available(ctx); + if (!get_reqs_available(ctx)) + return NULL; + } req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); if (unlikely(!req)) @@ -925,8 +991,8 @@ void aio_complete(struct kiocb *iocb, long res, long res2) struct kioctx *ctx = iocb->ki_ctx; struct aio_ring *ring; struct io_event *ev_page, *event; + unsigned tail, pos, head; unsigned long flags; - unsigned tail, pos; /* * Special case handling for sync iocbs: @@ -987,10 +1053,14 @@ void aio_complete(struct kiocb *iocb, long res, long res2) ctx->tail = tail; ring = kmap_atomic(ctx->ring_pages[0]); + head = ring->head; ring->tail = tail; kunmap_atomic(ring); flush_dcache_page(ctx->ring_pages[0]); + ctx->completed_events++; + if (ctx->completed_events > 1) + refill_reqs_available(ctx, head, tail); spin_unlock_irqrestore(&ctx->completion_lock, flags); pr_debug("added to ring %p at [%u]\n", iocb, tail); @@ -1005,7 +1075,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) /* everything turned out well, dispose of the aiocb. */ kiocb_free(iocb); - put_reqs_available(ctx, 1); /* * We have to order our ring_info tail store above and test diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 5a201d81049c..fbd76ded9a34 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -22,7 +22,6 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/freezer.h> -#include <linux/workqueue.h> #include "async-thread.h" #include "ctree.h" @@ -55,8 +54,39 @@ struct btrfs_workqueue { struct __btrfs_workqueue *high; }; -static inline struct __btrfs_workqueue -*__btrfs_alloc_workqueue(const char *name, int flags, int max_active, +static void normal_work_helper(struct btrfs_work *work); + +#define BTRFS_WORK_HELPER(name) \ +void btrfs_##name(struct work_struct *arg) \ +{ \ + struct btrfs_work *work = container_of(arg, struct btrfs_work, \ + normal_work); \ + normal_work_helper(work); \ +} + +BTRFS_WORK_HELPER(worker_helper); +BTRFS_WORK_HELPER(delalloc_helper); +BTRFS_WORK_HELPER(flush_delalloc_helper); +BTRFS_WORK_HELPER(cache_helper); +BTRFS_WORK_HELPER(submit_helper); +BTRFS_WORK_HELPER(fixup_helper); +BTRFS_WORK_HELPER(endio_helper); +BTRFS_WORK_HELPER(endio_meta_helper); +BTRFS_WORK_HELPER(endio_meta_write_helper); +BTRFS_WORK_HELPER(endio_raid56_helper); +BTRFS_WORK_HELPER(rmw_helper); +BTRFS_WORK_HELPER(endio_write_helper); +BTRFS_WORK_HELPER(freespace_write_helper); +BTRFS_WORK_HELPER(delayed_meta_helper); +BTRFS_WORK_HELPER(readahead_helper); +BTRFS_WORK_HELPER(qgroup_rescan_helper); +BTRFS_WORK_HELPER(extent_refs_helper); +BTRFS_WORK_HELPER(scrub_helper); +BTRFS_WORK_HELPER(scrubwrc_helper); +BTRFS_WORK_HELPER(scrubnc_helper); + +static struct __btrfs_workqueue * +__btrfs_alloc_workqueue(const char *name, int flags, int max_active, int thresh) { struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); @@ -232,13 +262,11 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) spin_unlock_irqrestore(lock, flags); } -static void normal_work_helper(struct work_struct *arg) +static void normal_work_helper(struct btrfs_work *work) { - struct btrfs_work *work; struct __btrfs_workqueue *wq; int need_order = 0; - work = container_of(arg, struct btrfs_work, normal_work); /* * We should not touch things inside work in the following cases: * 1) after work->func() if it has no ordered_free @@ -262,7 +290,7 @@ static void normal_work_helper(struct work_struct *arg) trace_btrfs_all_work_done(work); } -void btrfs_init_work(struct btrfs_work *work, +void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func, btrfs_func_t func, btrfs_func_t ordered_func, btrfs_func_t ordered_free) @@ -270,7 +298,7 @@ void btrfs_init_work(struct btrfs_work *work, work->func = func; work->ordered_func = ordered_func; work->ordered_free = ordered_free; - INIT_WORK(&work->normal_work, normal_work_helper); + INIT_WORK(&work->normal_work, uniq_func); INIT_LIST_HEAD(&work->ordered_list); work->flags = 0; } diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 9c6b66d15fb0..e9e31c94758f 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -19,12 +19,14 @@ #ifndef __BTRFS_ASYNC_THREAD_ #define __BTRFS_ASYNC_THREAD_ +#include <linux/workqueue.h> struct btrfs_workqueue; /* Internal use only */ struct __btrfs_workqueue; struct btrfs_work; typedef void (*btrfs_func_t)(struct btrfs_work *arg); +typedef void (*btrfs_work_func_t)(struct work_struct *arg); struct btrfs_work { btrfs_func_t func; @@ -38,11 +40,35 @@ struct btrfs_work { unsigned long flags; }; +#define BTRFS_WORK_HELPER_PROTO(name) \ +void btrfs_##name(struct work_struct *arg) + +BTRFS_WORK_HELPER_PROTO(worker_helper); +BTRFS_WORK_HELPER_PROTO(delalloc_helper); +BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper); +BTRFS_WORK_HELPER_PROTO(cache_helper); +BTRFS_WORK_HELPER_PROTO(submit_helper); +BTRFS_WORK_HELPER_PROTO(fixup_helper); +BTRFS_WORK_HELPER_PROTO(endio_helper); +BTRFS_WORK_HELPER_PROTO(endio_meta_helper); +BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper); +BTRFS_WORK_HELPER_PROTO(endio_raid56_helper); +BTRFS_WORK_HELPER_PROTO(rmw_helper); +BTRFS_WORK_HELPER_PROTO(endio_write_helper); +BTRFS_WORK_HELPER_PROTO(freespace_write_helper); +BTRFS_WORK_HELPER_PROTO(delayed_meta_helper); +BTRFS_WORK_HELPER_PROTO(readahead_helper); +BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper); +BTRFS_WORK_HELPER_PROTO(extent_refs_helper); +BTRFS_WORK_HELPER_PROTO(scrub_helper); +BTRFS_WORK_HELPER_PROTO(scrubwrc_helper); +BTRFS_WORK_HELPER_PROTO(scrubnc_helper); + struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, int flags, int max_active, int thresh); -void btrfs_init_work(struct btrfs_work *work, +void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper, btrfs_func_t func, btrfs_func_t ordered_func, btrfs_func_t ordered_free); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index da775bfdebc9..a2e90f855d7d 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1395,8 +1395,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, return -ENOMEM; async_work->delayed_root = delayed_root; - btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, - NULL, NULL); + btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper, + btrfs_async_run_delayed_root, NULL, NULL); async_work->nr = nr; btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d0ed9e664f7d..a1d36e62179c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -39,7 +39,6 @@ #include "btrfs_inode.h" #include "volumes.h" #include "print-tree.h" -#include "async-thread.h" #include "locking.h" #include "tree-log.h" #include "free-space-cache.h" @@ -693,35 +692,41 @@ static void end_workqueue_bio(struct bio *bio, int err) { struct end_io_wq *end_io_wq = bio->bi_private; struct btrfs_fs_info *fs_info; + struct btrfs_workqueue *wq; + btrfs_work_func_t func; fs_info = end_io_wq->info; end_io_wq->error = err; - btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL); if (bio->bi_rw & REQ_WRITE) { - if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) - btrfs_queue_work(fs_info->endio_meta_write_workers, - &end_io_wq->work); - else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) - btrfs_queue_work(fs_info->endio_freespace_worker, - &end_io_wq->work); - else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) - btrfs_queue_work(fs_info->endio_raid56_workers, - &end_io_wq->work); - else - btrfs_queue_work(fs_info->endio_write_workers, - &end_io_wq->work); + if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) { + wq = fs_info->endio_meta_write_workers; + func = btrfs_endio_meta_write_helper; + } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) { + wq = fs_info->endio_freespace_worker; + func = btrfs_freespace_write_helper; + } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { + wq = fs_info->endio_raid56_workers; + func = btrfs_endio_raid56_helper; + } else { + wq = fs_info->endio_write_workers; + func = btrfs_endio_write_helper; + } } else { - if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) - btrfs_queue_work(fs_info->endio_raid56_workers, - &end_io_wq->work); - else if (end_io_wq->metadata) - btrfs_queue_work(fs_info->endio_meta_workers, - &end_io_wq->work); - else - btrfs_queue_work(fs_info->endio_workers, - &end_io_wq->work); + if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { + wq = fs_info->endio_raid56_workers; + func = btrfs_endio_raid56_helper; + } else if (end_io_wq->metadata) { + wq = fs_info->endio_meta_workers; + func = btrfs_endio_meta_helper; + } else { + wq = fs_info->endio_workers; + func = btrfs_endio_helper; + } } + + btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL); + btrfs_queue_work(wq, &end_io_wq->work); } /* @@ -828,7 +833,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->submit_bio_start = submit_bio_start; async->submit_bio_done = submit_bio_done; - btrfs_init_work(&async->work, run_one_async_start, + btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start, run_one_async_done, run_one_async_free); async->bio_flags = bio_flags; @@ -3450,7 +3455,8 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) btrfs_set_stack_device_generation(dev_item, 0); btrfs_set_stack_device_type(dev_item, dev->type); btrfs_set_stack_device_id(dev_item, dev->devid); - btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); + btrfs_set_stack_device_total_bytes(dev_item, + dev->disk_total_bytes); btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); btrfs_set_stack_device_io_align(dev_item, dev->io_align); btrfs_set_stack_device_io_width(dev_item, dev->io_width); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 102ed3143976..3efe1c3877bf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -552,7 +552,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, caching_ctl->block_group = cache; caching_ctl->progress = cache->key.objectid; atomic_set(&caching_ctl->count, 1); - btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); + btrfs_init_work(&caching_ctl->work, btrfs_cache_helper, + caching_thread, NULL, NULL); spin_lock(&cache->lock); /* @@ -2749,8 +2750,8 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root, async->sync = 0; init_completion(&async->wait); - btrfs_init_work(&async->work, delayed_ref_async_start, - NULL, NULL); + btrfs_init_work(&async->work, btrfs_extent_refs_helper, + delayed_ref_async_start, NULL, NULL); btrfs_queue_work(root->fs_info->extent_workers, &async->work); @@ -3586,13 +3587,7 @@ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags) */ static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) { - /* - * we add in the count of missing devices because we want - * to make sure that any RAID levels on a degraded FS - * continue to be honored. - */ - u64 num_devices = root->fs_info->fs_devices->rw_devices + - root->fs_info->fs_devices->missing_devices; + u64 num_devices = root->fs_info->fs_devices->rw_devices; u64 target; u64 tmp; @@ -8440,13 +8435,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) if (stripped) return extended_to_chunk(stripped); - /* - * we add in the count of missing devices because we want - * to make sure that any RAID levels on a degraded FS - * continue to be honored. - */ - num_devices = root->fs_info->fs_devices->rw_devices + - root->fs_info->fs_devices->missing_devices; + num_devices = root->fs_info->fs_devices->rw_devices; stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3e11aab9f391..af0359dcf337 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2532,6 +2532,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) test_bit(BIO_UPTODATE, &bio->bi_flags); if (err) uptodate = 0; + offset += len; continue; } } @@ -4207,8 +4208,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, return -ENOMEM; path->leave_spinning = 1; - start = ALIGN(start, BTRFS_I(inode)->root->sectorsize); - len = ALIGN(len, BTRFS_I(inode)->root->sectorsize); + start = round_down(start, BTRFS_I(inode)->root->sectorsize); + len = round_up(max, BTRFS_I(inode)->root->sectorsize) - start; /* * lookup the last file extent. We're not using i_size here diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d3afac292d67..36861b7a6757 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1840,7 +1840,15 @@ int btrfs_release_file(struct inode *inode, struct file *filp) { if (filp->private_data) btrfs_ioctl_trans_end(filp); - filemap_flush(inode->i_mapping); + /* + * ordered_data_close is set by settattr when we are about to truncate + * a file from a non-zero size to a zero size. This tries to + * flush down new bytes that may have been written if the + * application were using truncate to replace a file in place. + */ + if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, + &BTRFS_I(inode)->runtime_flags)) + filemap_flush(inode->i_mapping); return 0; } @@ -2088,10 +2096,9 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode, goto out; } - if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) { + if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) { u64 num_bytes; - path->slots[0]++; key.offset = offset; btrfs_set_item_key_safe(root, path, &key); fi = btrfs_item_ptr(leaf, path->slots[0], @@ -2216,7 +2223,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) goto out_only_mutex; } - lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize); + lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); lockend = round_down(offset + len, BTRFS_I(inode)->root->sectorsize) - 1; same_page = ((offset >> PAGE_CACHE_SHIFT) == @@ -2277,7 +2284,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) tail_start + tail_len, 0, 1); if (ret) goto out_only_mutex; - } + } } } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 03708ef3deef..9c194bd74d6e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1096,8 +1096,10 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_cow->end = cur_end; INIT_LIST_HEAD(&async_cow->extents); - btrfs_init_work(&async_cow->work, async_cow_start, - async_cow_submit, async_cow_free); + btrfs_init_work(&async_cow->work, + btrfs_delalloc_helper, + async_cow_start, async_cow_submit, + async_cow_free); nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> PAGE_CACHE_SHIFT; @@ -1881,7 +1883,8 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) SetPageChecked(page); page_cache_get(page); - btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); + btrfs_init_work(&fixup->work, btrfs_fixup_helper, + btrfs_writepage_fixup_worker, NULL, NULL); fixup->page = page; btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work); return -EBUSY; @@ -2822,7 +2825,8 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct inode *inode = page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ordered_extent *ordered_extent = NULL; - struct btrfs_workqueue *workers; + struct btrfs_workqueue *wq; + btrfs_work_func_t func; trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); @@ -2831,13 +2835,17 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, end - start + 1, uptodate)) return 0; - btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); + if (btrfs_is_free_space_inode(inode)) { + wq = root->fs_info->endio_freespace_worker; + func = btrfs_freespace_write_helper; + } else { + wq = root->fs_info->endio_write_workers; + func = btrfs_endio_write_helper; + } - if (btrfs_is_free_space_inode(inode)) - workers = root->fs_info->endio_freespace_worker; - else - workers = root->fs_info->endio_write_workers; - btrfs_queue_work(workers, &ordered_extent->work); + btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL, + NULL); + btrfs_queue_work(wq, &ordered_extent->work); return 0; } @@ -4674,6 +4682,11 @@ static void evict_inode_truncate_pages(struct inode *inode) clear_bit(EXTENT_FLAG_LOGGING, &em->flags); remove_extent_mapping(map_tree, em); free_extent_map(em); + if (need_resched()) { + write_unlock(&map_tree->lock); + cond_resched(); + write_lock(&map_tree->lock); + } } write_unlock(&map_tree->lock); @@ -4696,6 +4709,7 @@ static void evict_inode_truncate_pages(struct inode *inode) &cached_state, GFP_NOFS); free_extent_state(state); + cond_resched(); spin_lock(&io_tree->lock); } spin_unlock(&io_tree->lock); @@ -5181,6 +5195,42 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) iput(inode); inode = ERR_PTR(ret); } + /* + * If orphan cleanup did remove any orphans, it means the tree + * was modified and therefore the commit root is not the same as + * the current root anymore. This is a problem, because send + * uses the commit root and therefore can see inode items that + * don't exist in the current root anymore, and for example make + * calls to btrfs_iget, which will do tree lookups based on the + * current root and not on the commit root. Those lookups will + * fail, returning a -ESTALE error, and making send fail with + * that error. So make sure a send does not see any orphans we + * have just removed, and that it will see the same inodes + * regardless of whether a transaction commit happened before + * it started (meaning that the commit root will be the same as + * the current root) or not. + */ + if (sub_root->node != sub_root->commit_root) { + u64 sub_flags = btrfs_root_flags(&sub_root->root_item); + + if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) { + struct extent_buffer *eb; + + /* + * Assert we can't have races between dentry + * lookup called through the snapshot creation + * ioctl and the VFS. + */ + ASSERT(mutex_is_locked(&dir->i_mutex)); + + down_write(&root->fs_info->commit_root_sem); + eb = sub_root->commit_root; + sub_root->commit_root = + btrfs_root_node(sub_root); + up_write(&root->fs_info->commit_root_sem); + free_extent_buffer(eb); + } + } } return inode; @@ -5606,6 +5656,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, } /* + * O_TMPFILE, set link count to 0, so that after this point, + * we fill in an inode item with the correct link count. + */ + if (!name) + set_nlink(inode, 0); + + /* * we have to initialize this early, so we can reclaim the inode * number if we fail afterwards in this function. */ @@ -6097,14 +6154,14 @@ out_fail: static int merge_extent_mapping(struct extent_map_tree *em_tree, struct extent_map *existing, struct extent_map *em, - u64 map_start, u64 map_len) + u64 map_start) { u64 start_diff; BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); start_diff = map_start - em->start; em->start = map_start; - em->len = map_len; + em->len = existing->start - em->start; if (em->block_start < EXTENT_MAP_LAST_BYTE && !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { em->block_start += start_diff; @@ -6275,6 +6332,8 @@ next: goto not_found; if (start + len <= found_key.offset) goto not_found; + if (start > found_key.offset) + goto next; em->start = start; em->orig_start = start; em->len = found_key.offset - start; @@ -6390,8 +6449,7 @@ insert: em->len); if (existing) { err = merge_extent_mapping(em_tree, existing, - em, start, - root->sectorsize); + em, start); free_extent_map(existing); if (err) { free_extent_map(em); @@ -7158,7 +7216,8 @@ again: if (!ret) goto out_test; - btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); + btrfs_init_work(&ordered->work, btrfs_endio_write_helper, + finish_ordered_fn, NULL, NULL); btrfs_queue_work(root->fs_info->endio_write_workers, &ordered->work); out_test: @@ -7306,10 +7365,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, map_length = orig_bio->bi_iter.bi_size; ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, &map_length, NULL, 0); - if (ret) { - bio_put(orig_bio); + if (ret) return -EIO; - } if (map_length >= orig_bio->bi_iter.bi_size) { bio = orig_bio; @@ -7326,6 +7383,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); if (!bio) return -ENOMEM; + bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; atomic_inc(&dip->pending_bios); @@ -7534,7 +7592,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, count = iov_iter_count(iter); if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &BTRFS_I(inode)->runtime_flags)) - filemap_fdatawrite_range(inode->i_mapping, offset, count); + filemap_fdatawrite_range(inode->i_mapping, offset, + offset + count - 1); if (rw & WRITE) { /* @@ -8495,7 +8554,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, work->inode = inode; work->wait = wait; work->delay_iput = delay_iput; - btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); + WARN_ON_ONCE(!inode); + btrfs_init_work(&work->work, btrfs_flush_delalloc_helper, + btrfs_run_delalloc_work, NULL, NULL); return work; } @@ -8979,6 +9040,14 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) if (ret) goto out; + /* + * We set number of links to 0 in btrfs_new_inode(), and here we set + * it to 1 because d_tmpfile() will issue a warning if the count is 0, + * through: + * + * d_tmpfile() -> inode_dec_link_count() -> drop_nlink() + */ + set_nlink(inode, 1); d_tmpfile(dentry, inode); mark_inode_dirty(inode); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 47aceb494d1d..fce6fd0e3f50 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -711,39 +711,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, if (ret) goto fail; - ret = btrfs_orphan_cleanup(pending_snapshot->snap); - if (ret) - goto fail; - - /* - * If orphan cleanup did remove any orphans, it means the tree was - * modified and therefore the commit root is not the same as the - * current root anymore. This is a problem, because send uses the - * commit root and therefore can see inode items that don't exist - * in the current root anymore, and for example make calls to - * btrfs_iget, which will do tree lookups based on the current root - * and not on the commit root. Those lookups will fail, returning a - * -ESTALE error, and making send fail with that error. So make sure - * a send does not see any orphans we have just removed, and that it - * will see the same inodes regardless of whether a transaction - * commit happened before it started (meaning that the commit root - * will be the same as the current root) or not. - */ - if (readonly && pending_snapshot->snap->node != - pending_snapshot->snap->commit_root) { - trans = btrfs_join_transaction(pending_snapshot->snap); - if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) { - ret = PTR_ERR(trans); - goto fail; - } - if (!IS_ERR(trans)) { - ret = btrfs_commit_transaction(trans, - pending_snapshot->snap); - if (ret) - goto fail; - } - } - inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); if (IS_ERR(inode)) { ret = PTR_ERR(inode); @@ -3527,7 +3494,8 @@ process_slot: btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); - last_dest_end = new_key.offset + datal; + last_dest_end = ALIGN(new_key.offset + datal, + root->sectorsize); ret = clone_finish_inode_update(trans, inode, last_dest_end, destoff, olen); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 963895c1f801..ac734ec4cc20 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -615,6 +615,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) spin_unlock(&root->ordered_extent_lock); btrfs_init_work(&ordered->flush_work, + btrfs_flush_delalloc_helper, btrfs_run_ordered_extent_work, NULL, NULL); list_add_tail(&ordered->work_list, &works); btrfs_queue_work(root->fs_info->flush_workers, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b497498484be..ded5c601d916 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1973,7 +1973,7 @@ static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans, elem.seq, &roots); btrfs_put_tree_mod_seq(fs_info, &elem); if (ret < 0) - return ret; + goto out; if (roots->nnodes != 1) goto out; @@ -2720,6 +2720,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, memset(&fs_info->qgroup_rescan_work, 0, sizeof(fs_info->qgroup_rescan_work)); btrfs_init_work(&fs_info->qgroup_rescan_work, + btrfs_qgroup_rescan_helper, btrfs_qgroup_rescan_worker, NULL, NULL); if (ret) { diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 4a88f073fdd7..0a6b6e4bcbb9 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1416,7 +1416,8 @@ cleanup: static void async_rmw_stripe(struct btrfs_raid_bio *rbio) { - btrfs_init_work(&rbio->work, rmw_work, NULL, NULL); + btrfs_init_work(&rbio->work, btrfs_rmw_helper, + rmw_work, NULL, NULL); btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); @@ -1424,7 +1425,8 @@ static void async_rmw_stripe(struct btrfs_raid_bio *rbio) static void async_read_rebuild(struct btrfs_raid_bio *rbio) { - btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL); + btrfs_init_work(&rbio->work, btrfs_rmw_helper, + read_rebuild_work, NULL, NULL); btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); @@ -1665,7 +1667,8 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) plug = container_of(cb, struct btrfs_plug_cb, cb); if (from_schedule) { - btrfs_init_work(&plug->work, unplug_work, NULL, NULL); + btrfs_init_work(&plug->work, btrfs_rmw_helper, + unplug_work, NULL, NULL); btrfs_queue_work(plug->info->rmw_workers, &plug->work); return; diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 09230cf3a244..20408c6b665a 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -798,7 +798,8 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) /* FIXME we cannot handle this properly right now */ BUG(); } - btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL); + btrfs_init_work(&rmw->work, btrfs_readahead_helper, + reada_start_machine_worker, NULL, NULL); rmw->fs_info = fs_info; btrfs_queue_work(fs_info->readahead_workers, &rmw->work); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index b6d198f5181e..f4a41f37be22 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -428,8 +428,8 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) sbio->index = i; sbio->sctx = sctx; sbio->page_count = 0; - btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, - NULL, NULL); + btrfs_init_work(&sbio->work, btrfs_scrub_helper, + scrub_bio_end_io_worker, NULL, NULL); if (i != SCRUB_BIOS_PER_SCTX - 1) sctx->bios[i]->next_free = i + 1; @@ -999,8 +999,8 @@ nodatasum_case: fixup_nodatasum->root = fs_info->extent_root; fixup_nodatasum->mirror_num = failed_mirror_index + 1; scrub_pending_trans_workers_inc(sctx); - btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum, - NULL, NULL); + btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper, + scrub_fixup_nodatasum, NULL, NULL); btrfs_queue_work(fs_info->scrub_workers, &fixup_nodatasum->work); goto out; @@ -1616,7 +1616,8 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err) sbio->err = err; sbio->bio = bio; - btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL); + btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper, + scrub_wr_bio_end_io_worker, NULL, NULL); btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); } @@ -2904,6 +2905,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, struct scrub_ctx *sctx; int ret; struct btrfs_device *dev; + struct rcu_string *name; if (btrfs_fs_closing(fs_info)) return -EINVAL; @@ -2965,6 +2967,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, return -ENODEV; } + if (!is_dev_replace && !readonly && !dev->writeable) { + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + rcu_read_lock(); + name = rcu_dereference(dev->name); + btrfs_err(fs_info, "scrub: device %s is not writable", + name->str); + rcu_read_unlock(); + return -EROFS; + } + mutex_lock(&fs_info->scrub_lock); if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) { mutex_unlock(&fs_info->scrub_lock); @@ -3203,7 +3215,8 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, nocow_ctx->len = len; nocow_ctx->mirror_num = mirror_num; nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; - btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL); + btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper, + copy_nocow_pages_worker, NULL, NULL); INIT_LIST_HEAD(&nocow_ctx->inodes); btrfs_queue_work(fs_info->scrub_nocow_workers, &nocow_ctx->work); diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 78699364f537..12e53556e214 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -614,7 +614,7 @@ int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info, if (!fs_info->device_dir_kobj) return -EINVAL; - if (one_device) { + if (one_device && one_device->bdev) { disk = one_device->bdev->bd_part; disk_kobj = &part_to_dev(disk)->kobj; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9e1f2cd5e67a..7e0e6e3029dd 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3298,7 +3298,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, struct list_head ordered_sums; int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; bool has_extents = false; - bool need_find_last_extent = (*last_extent == 0); + bool need_find_last_extent = true; bool done = false; INIT_LIST_HEAD(&ordered_sums); @@ -3352,8 +3352,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, */ if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { has_extents = true; - if (need_find_last_extent && - first_key.objectid == (u64)-1) + if (first_key.objectid == (u64)-1) first_key = ins_keys[i]; } else { need_find_last_extent = false; @@ -3427,6 +3426,16 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, if (!has_extents) return ret; + if (need_find_last_extent && *last_extent == first_key.offset) { + /* + * We don't have any leafs between our current one and the one + * we processed before that can have file extent items for our + * inode (and have a generation number smaller than our current + * transaction id). + */ + need_find_last_extent = false; + } + /* * Because we use btrfs_search_forward we could skip leaves that were * not modified and then assume *last_extent is valid when it really @@ -3537,7 +3546,7 @@ fill_holes: 0, 0); if (ret) break; - *last_extent = offset + len; + *last_extent = extent_end; } /* * Need to let the callers know we dropped the path so they should diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6cb82f62cb7c..340a92d08e84 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -508,6 +508,44 @@ static noinline int device_list_add(const char *path, ret = 1; device->fs_devices = fs_devices; } else if (!device->name || strcmp(device->name->str, path)) { + /* + * When FS is already mounted. + * 1. If you are here and if the device->name is NULL that + * means this device was missing at time of FS mount. + * 2. If you are here and if the device->name is different + * from 'path' that means either + * a. The same device disappeared and reappeared with + * different name. or + * b. The missing-disk-which-was-replaced, has + * reappeared now. + * + * We must allow 1 and 2a above. But 2b would be a spurious + * and unintentional. + * + * Further in case of 1 and 2a above, the disk at 'path' + * would have missed some transaction when it was away and + * in case of 2a the stale bdev has to be updated as well. + * 2b must not be allowed at all time. + */ + + /* + * As of now don't allow update to btrfs_fs_device through + * the btrfs dev scan cli, after FS has been mounted. + */ + if (fs_devices->opened) { + return -EBUSY; + } else { + /* + * That is if the FS is _not_ mounted and if you + * are here, that means there is more than one + * disk with same uuid and devid.We keep the one + * with larger generation number or the last-in if + * generation are equal. + */ + if (found_transid < device->generation) + return -EEXIST; + } + name = rcu_string_strdup(path, GFP_NOFS); if (!name) return -ENOMEM; @@ -519,6 +557,15 @@ static noinline int device_list_add(const char *path, } } + /* + * Unmount does not free the btrfs_device struct but would zero + * generation along with most of the other members. So just update + * it back. We need it to pick the disk with largest generation + * (as above). + */ + if (!fs_devices->opened) + device->generation = found_transid; + if (found_transid > fs_devices->latest_trans) { fs_devices->latest_devid = devid; fs_devices->latest_trans = found_transid; @@ -1436,7 +1483,7 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans, btrfs_set_device_io_align(leaf, dev_item, device->io_align); btrfs_set_device_io_width(leaf, dev_item, device->io_width); btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); - btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); + btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes); btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); btrfs_set_device_group(leaf, dev_item, 0); btrfs_set_device_seek_speed(leaf, dev_item, 0); @@ -1671,7 +1718,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) device->fs_devices->total_devices--; if (device->missing) - root->fs_info->fs_devices->missing_devices--; + device->fs_devices->missing_devices--; next_device = list_entry(root->fs_info->fs_devices->devices.next, struct btrfs_device, dev_list); @@ -1801,8 +1848,12 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, if (srcdev->bdev) { fs_info->fs_devices->open_devices--; - /* zero out the old super */ - btrfs_scratch_superblock(srcdev); + /* + * zero out the old super if it is not writable + * (e.g. seed device) + */ + if (srcdev->writeable) + btrfs_scratch_superblock(srcdev); } call_rcu(&srcdev->rcu, free_device); @@ -1941,6 +1992,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) fs_devices->seeding = 0; fs_devices->num_devices = 0; fs_devices->open_devices = 0; + fs_devices->missing_devices = 0; + fs_devices->num_can_discard = 0; + fs_devices->rotating = 0; fs_devices->seed = seed_devices; generate_random_uuid(fs_devices->fsid); @@ -5800,7 +5854,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, else generate_random_uuid(dev->uuid); - btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); + btrfs_init_work(&dev->work, btrfs_submit_helper, + pending_bios_fn, NULL, NULL); return dev; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index ac4f260155c8..889b98455750 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -207,6 +207,19 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len) +{ + struct super_block *sb = file->f_path.dentry->d_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + struct TCP_Server_Info *server = tcon->ses->server; + + if (server->ops->fallocate) + return server->ops->fallocate(file, tcon, mode, off, len); + + return -EOPNOTSUPP; +} + static int cifs_permission(struct inode *inode, int mask) { struct cifs_sb_info *cifs_sb; @@ -812,8 +825,9 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) if (!(S_ISREG(inode->i_mode))) return -EINVAL; - /* check if file is oplocked */ - if (((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) || + /* Check if file is oplocked if this is request for new lease */ + if (arg == F_UNLCK || + ((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) || ((arg == F_WRLCK) && CIFS_CACHE_WRITE(CIFS_I(inode)))) return generic_setlease(file, arg, lease); else if (tlink_tcon(cfile->tlink)->local_lease && @@ -908,6 +922,7 @@ const struct file_operations cifs_file_ops = { .unlocked_ioctl = cifs_ioctl, #endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, + .fallocate = cifs_fallocate, }; const struct file_operations cifs_file_strict_ops = { @@ -927,6 +942,7 @@ const struct file_operations cifs_file_strict_ops = { .unlocked_ioctl = cifs_ioctl, #endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, + .fallocate = cifs_fallocate, }; const struct file_operations cifs_file_direct_ops = { @@ -947,6 +963,7 @@ const struct file_operations cifs_file_direct_ops = { #endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, .setlease = cifs_setlease, + .fallocate = cifs_fallocate, }; const struct file_operations cifs_file_nobrl_ops = { @@ -965,6 +982,7 @@ const struct file_operations cifs_file_nobrl_ops = { .unlocked_ioctl = cifs_ioctl, #endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, + .fallocate = cifs_fallocate, }; const struct file_operations cifs_file_strict_nobrl_ops = { @@ -983,6 +1001,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .unlocked_ioctl = cifs_ioctl, #endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, + .fallocate = cifs_fallocate, }; const struct file_operations cifs_file_direct_nobrl_ops = { @@ -1002,6 +1021,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { #endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, .setlease = cifs_setlease, + .fallocate = cifs_fallocate, }; const struct file_operations cifs_dir_ops = { diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 0012e1e291d4..dfc731b02aa9 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -409,6 +409,10 @@ struct smb_version_operations { /* get mtu credits */ int (*wait_mtu_credits)(struct TCP_Server_Info *, unsigned int, unsigned int *, unsigned int *); + /* check if we need to issue closedir */ + bool (*dir_needs_close)(struct cifsFileInfo *); + long (*fallocate)(struct file *, struct cifs_tcon *, int, loff_t, + loff_t); }; struct smb_version_values { @@ -883,6 +887,7 @@ struct cifs_tcon { for this mount even if server would support */ bool local_lease:1; /* check leases (only) on local system not remote */ bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ + bool broken_sparse_sup; /* if server or share does not support sparse */ bool need_reconnect:1; /* connection reset, tid now invalid */ #ifdef CONFIG_CIFS_SMB2 bool print:1; /* set if connection to printer share */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 33df36ef9d52..5f9822ac0245 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -2253,6 +2253,29 @@ typedef struct { /* minimum includes first three fields, and empty FS Name */ #define MIN_FS_ATTR_INFO_SIZE 12 + +/* List of FileSystemAttributes - see 2.5.1 of MS-FSCC */ +#define FILE_SUPPORT_INTEGRITY_STREAMS 0x04000000 +#define FILE_SUPPORTS_USN_JOURNAL 0x02000000 +#define FILE_SUPPORTS_OPEN_BY_FILE_ID 0x01000000 +#define FILE_SUPPORTS_EXTENDED_ATTRIBUTES 0x00800000 +#define FILE_SUPPORTS_HARD_LINKS 0x00400000 +#define FILE_SUPPORTS_TRANSACTIONS 0x00200000 +#define FILE_SEQUENTIAL_WRITE_ONCE 0x00100000 +#define FILE_READ_ONLY_VOLUME 0x00080000 +#define FILE_NAMED_STREAMS 0x00040000 +#define FILE_SUPPORTS_ENCRYPTION 0x00020000 +#define FILE_SUPPORTS_OBJECT_IDS 0x00010000 +#define FILE_VOLUME_IS_COMPRESSED 0x00008000 +#define FILE_SUPPORTS_REMOTE_STORAGE 0x00000100 +#define FILE_SUPPORTS_REPARSE_POINTS 0x00000080 +#define FILE_SUPPORTS_SPARSE_FILES 0x00000040 +#define FILE_VOLUME_QUOTAS 0x00000020 +#define FILE_FILE_COMPRESSION 0x00000010 +#define FILE_PERSISTENT_ACLS 0x00000008 +#define FILE_UNICODE_ON_DISK 0x00000004 +#define FILE_CASE_PRESERVED_NAMES 0x00000002 +#define FILE_CASE_SENSITIVE_SEARCH 0x00000001 typedef struct { __le32 Attributes; __le32 MaxPathNameComponentLength; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4ab2f79ffa7a..d5fec92e0360 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -762,7 +762,7 @@ int cifs_closedir(struct inode *inode, struct file *file) cifs_dbg(FYI, "Freeing private data in close dir\n"); spin_lock(&cifs_file_list_lock); - if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { + if (server->ops->dir_needs_close(cfile)) { cfile->invalidHandle = true; spin_unlock(&cifs_file_list_lock); if (server->ops->close_dir) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 426d6c6ad8bf..949ec909ec9a 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1727,6 +1727,12 @@ unlink_target: target_dentry, to_name); } + /* force revalidate to go get info when needed */ + CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0; + + source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime = + target_dir->i_mtime = current_fs_time(source_dir->i_sb); + cifs_rename_exit: kfree(info_buf_source); kfree(from_name); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 81340c6253eb..b7415d596dbd 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -574,13 +574,6 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) cinode->oplock = 0; } -static int -cifs_oplock_break_wait(void *unused) -{ - schedule(); - return signal_pending(current) ? -ERESTARTSYS : 0; -} - /* * We wait for oplock breaks to be processed before we attempt to perform * writes. diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index b15862e0f68c..798c80a41c88 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -593,7 +593,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, /* close and restart search */ cifs_dbg(FYI, "search backing up - close and restart search\n"); spin_lock(&cifs_file_list_lock); - if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { + if (server->ops->dir_needs_close(cfile)) { cfile->invalidHandle = true; spin_unlock(&cifs_file_list_lock); if (server->ops->close) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 5e8c22d6c7b9..1a6df4b03f67 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -1015,6 +1015,12 @@ cifs_wp_retry_size(struct inode *inode) return CIFS_SB(inode->i_sb)->wsize; } +static bool +cifs_dir_needs_close(struct cifsFileInfo *cfile) +{ + return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -1086,6 +1092,7 @@ struct smb_version_operations smb1_operations = { .create_mf_symlink = cifs_create_mf_symlink, .is_read_op = cifs_is_read_op, .wp_retry_size = cifs_wp_retry_size, + .dir_needs_close = cifs_dir_needs_close, #ifdef CONFIG_CIFS_XATTR .query_all_EAs = CIFSSMBQAllEAs, .set_EA = CIFSSMBSetEA, diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index e31a9dfdcd39..af59d03db492 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -214,7 +214,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"}, {STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"}, {STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"}, - {STATUS_NO_MORE_FILES, -EIO, "STATUS_NO_MORE_FILES"}, + {STATUS_NO_MORE_FILES, -ENODATA, "STATUS_NO_MORE_FILES"}, {STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"}, {STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"}, {STATUS_NO_INHERITANCE, -EIO, "STATUS_NO_INHERITANCE"}, @@ -298,7 +298,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_INVALID_PARAMETER, -EINVAL, "STATUS_INVALID_PARAMETER"}, {STATUS_NO_SUCH_DEVICE, -ENODEV, "STATUS_NO_SUCH_DEVICE"}, {STATUS_NO_SUCH_FILE, -ENOENT, "STATUS_NO_SUCH_FILE"}, - {STATUS_INVALID_DEVICE_REQUEST, -EIO, "STATUS_INVALID_DEVICE_REQUEST"}, + {STATUS_INVALID_DEVICE_REQUEST, -EOPNOTSUPP, "STATUS_INVALID_DEVICE_REQUEST"}, {STATUS_END_OF_FILE, -ENODATA, "STATUS_END_OF_FILE"}, {STATUS_WRONG_VOLUME, -EIO, "STATUS_WRONG_VOLUME"}, {STATUS_NO_MEDIA_IN_DEVICE, -EIO, "STATUS_NO_MEDIA_IN_DEVICE"}, diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index f2e6ac29a8d6..4aa7a0f07d6e 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -178,9 +178,24 @@ smb2_check_message(char *buf, unsigned int length) /* Windows 7 server returns 24 bytes more */ if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE) return 0; - /* server can return one byte more */ + /* server can return one byte more due to implied bcc[0] */ if (clc_len == 4 + len + 1) return 0; + + /* + * MacOS server pads after SMB2.1 write response with 3 bytes + * of junk. Other servers match RFC1001 len to actual + * SMB2/SMB3 frame length (header + smb2 response specific data) + * Log the server error (once), but allow it and continue + * since the frame is parseable. + */ + if (clc_len < 4 /* RFC1001 header size */ + len) { + printk_once(KERN_WARNING + "SMB2 server sent bad RFC1001 len %d not %d\n", + len, clc_len - 4); + return 0; + } + return 1; } return 0; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 77f8aeb9c2fc..5a48aa290dfe 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -731,11 +731,72 @@ smb2_sync_write(const unsigned int xid, struct cifsFileInfo *cfile, return SMB2_write(xid, parms, written, iov, nr_segs); } +/* Set or clear the SPARSE_FILE attribute based on value passed in setsparse */ +static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon, + struct cifsFileInfo *cfile, struct inode *inode, __u8 setsparse) +{ + struct cifsInodeInfo *cifsi; + int rc; + + cifsi = CIFS_I(inode); + + /* if file already sparse don't bother setting sparse again */ + if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) && setsparse) + return true; /* already sparse */ + + if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) && !setsparse) + return true; /* already not sparse */ + + /* + * Can't check for sparse support on share the usual way via the + * FS attribute info (FILE_SUPPORTS_SPARSE_FILES) on the share + * since Samba server doesn't set the flag on the share, yet + * supports the set sparse FSCTL and returns sparse correctly + * in the file attributes. If we fail setting sparse though we + * mark that server does not support sparse files for this share + * to avoid repeatedly sending the unsupported fsctl to server + * if the file is repeatedly extended. + */ + if (tcon->broken_sparse_sup) + return false; + + rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, FSCTL_SET_SPARSE, + true /* is_fctl */, &setsparse, 1, NULL, NULL); + if (rc) { + tcon->broken_sparse_sup = true; + cifs_dbg(FYI, "set sparse rc = %d\n", rc); + return false; + } + + if (setsparse) + cifsi->cifsAttrs |= FILE_ATTRIBUTE_SPARSE_FILE; + else + cifsi->cifsAttrs &= (~FILE_ATTRIBUTE_SPARSE_FILE); + + return true; +} + static int smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *cfile, __u64 size, bool set_alloc) { __le64 eof = cpu_to_le64(size); + struct inode *inode; + + /* + * If extending file more than one page make sparse. Many Linux fs + * make files sparse by default when extending via ftruncate + */ + inode = cfile->dentry->d_inode; + + if (!set_alloc && (size > inode->i_size + 8192)) { + __u8 set_sparse = 1; + + /* whether set sparse succeeds or not, extend the file */ + smb2_set_sparse(xid, tcon, cfile, inode, set_sparse); + } + return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, cfile->pid, &eof, false); } @@ -954,6 +1015,105 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, return rc; } +static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, + loff_t offset, loff_t len, bool keep_size) +{ + struct inode *inode; + struct cifsInodeInfo *cifsi; + struct cifsFileInfo *cfile = file->private_data; + struct file_zero_data_information fsctl_buf; + long rc; + unsigned int xid; + + xid = get_xid(); + + inode = cfile->dentry->d_inode; + cifsi = CIFS_I(inode); + + /* if file not oplocked can't be sure whether asking to extend size */ + if (!CIFS_CACHE_READ(cifsi)) + if (keep_size == false) + return -EOPNOTSUPP; + + /* + * Must check if file sparse since fallocate -z (zero range) assumes + * non-sparse allocation + */ + if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE)) + return -EOPNOTSUPP; + + /* + * need to make sure we are not asked to extend the file since the SMB3 + * fsctl does not change the file size. In the future we could change + * this to zero the first part of the range then set the file size + * which for a non sparse file would zero the newly extended range + */ + if (keep_size == false) + if (i_size_read(inode) < offset + len) + return -EOPNOTSUPP; + + cifs_dbg(FYI, "offset %lld len %lld", offset, len); + + fsctl_buf.FileOffset = cpu_to_le64(offset); + fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len); + + rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, + true /* is_fctl */, (char *)&fsctl_buf, + sizeof(struct file_zero_data_information), NULL, NULL); + free_xid(xid); + return rc; +} + +static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, + loff_t offset, loff_t len) +{ + struct inode *inode; + struct cifsInodeInfo *cifsi; + struct cifsFileInfo *cfile = file->private_data; + struct file_zero_data_information fsctl_buf; + long rc; + unsigned int xid; + __u8 set_sparse = 1; + + xid = get_xid(); + + inode = cfile->dentry->d_inode; + cifsi = CIFS_I(inode); + + /* Need to make file sparse, if not already, before freeing range. */ + /* Consider adding equivalent for compressed since it could also work */ + if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) + return -EOPNOTSUPP; + + cifs_dbg(FYI, "offset %lld len %lld", offset, len); + + fsctl_buf.FileOffset = cpu_to_le64(offset); + fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len); + + rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, + true /* is_fctl */, (char *)&fsctl_buf, + sizeof(struct file_zero_data_information), NULL, NULL); + free_xid(xid); + return rc; +} + +static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode, + loff_t off, loff_t len) +{ + /* KEEP_SIZE already checked for by do_fallocate */ + if (mode & FALLOC_FL_PUNCH_HOLE) + return smb3_punch_hole(file, tcon, off, len); + else if (mode & FALLOC_FL_ZERO_RANGE) { + if (mode & FALLOC_FL_KEEP_SIZE) + return smb3_zero_range(file, tcon, off, len, true); + return smb3_zero_range(file, tcon, off, len, false); + } + + return -EOPNOTSUPP; +} + static void smb2_downgrade_oplock(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, bool set_level2) @@ -1161,6 +1321,12 @@ smb2_wp_retry_size(struct inode *inode) SMB2_MAX_BUFFER_SIZE); } +static bool +smb2_dir_needs_close(struct cifsFileInfo *cfile) +{ + return !cfile->invalidHandle; +} + struct smb_version_operations smb20_operations = { .compare_fids = smb2_compare_fids, .setup_request = smb2_setup_request, @@ -1236,6 +1402,7 @@ struct smb_version_operations smb20_operations = { .parse_lease_buf = smb2_parse_lease_buf, .clone_range = smb2_clone_range, .wp_retry_size = smb2_wp_retry_size, + .dir_needs_close = smb2_dir_needs_close, }; struct smb_version_operations smb21_operations = { @@ -1313,6 +1480,7 @@ struct smb_version_operations smb21_operations = { .parse_lease_buf = smb2_parse_lease_buf, .clone_range = smb2_clone_range, .wp_retry_size = smb2_wp_retry_size, + .dir_needs_close = smb2_dir_needs_close, }; struct smb_version_operations smb30_operations = { @@ -1393,6 +1561,8 @@ struct smb_version_operations smb30_operations = { .clone_range = smb2_clone_range, .validate_negotiate = smb3_validate_negotiate, .wp_retry_size = smb2_wp_retry_size, + .dir_needs_close = smb2_dir_needs_close, + .fallocate = smb3_fallocate, }; struct smb_version_values smb20_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 42ebc1a8be6c..fa0dd044213b 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -907,7 +907,8 @@ tcon_exit: tcon_error_exit: if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) { cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); - tcon->bad_network_name = true; + if (tcon) + tcon->bad_network_name = true; } goto tcon_exit; } @@ -1224,7 +1225,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, cifs_dbg(FYI, "SMB2 IOCTL\n"); - *out_data = NULL; + if (out_data != NULL) + *out_data = NULL; + /* zero out returned data len, in case of error */ if (plen) *plen = 0; @@ -2177,6 +2180,10 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base; if (rc) { + if (rc == -ENODATA && rsp->hdr.Status == STATUS_NO_MORE_FILES) { + srch_inf->endOfSearch = true; + rc = 0; + } cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); goto qdir_exit; } @@ -2214,11 +2221,6 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, else cifs_dbg(VFS, "illegal search buffer type\n"); - if (rsp->hdr.Status == STATUS_NO_MORE_FILES) - srch_inf->endOfSearch = 1; - else - srch_inf->endOfSearch = 0; - return rc; qdir_exit: diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 69f3595d3952..fbe486c285a9 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -573,6 +573,12 @@ struct copychunk_ioctl { __u32 Reserved2; } __packed; +/* this goes in the ioctl buffer when doing FSCTL_SET_ZERO_DATA */ +struct file_zero_data_information { + __le64 FileOffset; + __le64 BeyondFinalZero; +} __packed; + struct copychunk_ioctl_rsp { __le32 ChunksWritten; __le32 ChunkBytesWritten; diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index 0e538b5c9622..83efa59535be 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h @@ -63,7 +63,7 @@ #define FSCTL_SET_OBJECT_ID_EXTENDED 0x000900BC /* BB add struct */ #define FSCTL_CREATE_OR_GET_OBJECT_ID 0x000900C0 /* BB add struct */ #define FSCTL_SET_SPARSE 0x000900C4 /* BB add struct */ -#define FSCTL_SET_ZERO_DATA 0x000900C8 /* BB add struct */ +#define FSCTL_SET_ZERO_DATA 0x000980C8 #define FSCTL_SET_ENCRYPTION 0x000900D7 /* BB add struct */ #define FSCTL_ENCRYPTION_FSCTL_IO 0x000900DB /* BB add struct */ #define FSCTL_WRITE_RAW_ENCRYPTED 0x000900DF /* BB add struct */ diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 08cdfe5461e3..622e88249024 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2828,8 +2828,9 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) */ overhead += ngroups * (2 + sbi->s_itb_per_group); - /* Add the journal blocks as well */ - overhead += sbi->s_journal->j_maxlen; + /* Add the internal journal blocks as well */ + if (sbi->s_journal && !sbi->journal_bdev) + overhead += sbi->s_journal->j_maxlen; sbi->s_overhead_last = overhead; smp_wmb(); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 4556ce1af5b0..5ddaf8625d3b 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -61,7 +61,7 @@ static void isofs_put_super(struct super_block *sb) return; } -static int isofs_read_inode(struct inode *); +static int isofs_read_inode(struct inode *, int relocated); static int isofs_statfs (struct dentry *, struct kstatfs *); static struct kmem_cache *isofs_inode_cachep; @@ -1259,7 +1259,7 @@ out_toomany: goto out; } -static int isofs_read_inode(struct inode *inode) +static int isofs_read_inode(struct inode *inode, int relocated) { struct super_block *sb = inode->i_sb; struct isofs_sb_info *sbi = ISOFS_SB(sb); @@ -1404,7 +1404,7 @@ static int isofs_read_inode(struct inode *inode) */ if (!high_sierra) { - parse_rock_ridge_inode(de, inode); + parse_rock_ridge_inode(de, inode, relocated); /* if we want uid/gid set, override the rock ridge setting */ if (sbi->s_uid_set) inode->i_uid = sbi->s_uid; @@ -1483,9 +1483,10 @@ static int isofs_iget5_set(struct inode *ino, void *data) * offset that point to the underlying meta-data for the inode. The * code below is otherwise similar to the iget() code in * include/linux/fs.h */ -struct inode *isofs_iget(struct super_block *sb, - unsigned long block, - unsigned long offset) +struct inode *__isofs_iget(struct super_block *sb, + unsigned long block, + unsigned long offset, + int relocated) { unsigned long hashval; struct inode *inode; @@ -1507,7 +1508,7 @@ struct inode *isofs_iget(struct super_block *sb, return ERR_PTR(-ENOMEM); if (inode->i_state & I_NEW) { - ret = isofs_read_inode(inode); + ret = isofs_read_inode(inode, relocated); if (ret < 0) { iget_failed(inode); inode = ERR_PTR(ret); diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 99167238518d..0ac4c1f73fbd 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -107,7 +107,7 @@ extern int iso_date(char *, int); struct inode; /* To make gcc happy */ -extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *); +extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *, int relocated); extern int get_rock_ridge_filename(struct iso_directory_record *, char *, struct inode *); extern int isofs_name_translate(struct iso_directory_record *, char *, struct inode *); @@ -118,9 +118,24 @@ extern struct dentry *isofs_lookup(struct inode *, struct dentry *, unsigned int extern struct buffer_head *isofs_bread(struct inode *, sector_t); extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long); -extern struct inode *isofs_iget(struct super_block *sb, - unsigned long block, - unsigned long offset); +struct inode *__isofs_iget(struct super_block *sb, + unsigned long block, + unsigned long offset, + int relocated); + +static inline struct inode *isofs_iget(struct super_block *sb, + unsigned long block, + unsigned long offset) +{ + return __isofs_iget(sb, block, offset, 0); +} + +static inline struct inode *isofs_iget_reloc(struct super_block *sb, + unsigned long block, + unsigned long offset) +{ + return __isofs_iget(sb, block, offset, 1); +} /* Because the inode number is no longer relevant to finding the * underlying meta-data for an inode, we are free to choose a more diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index c0bf42472e40..f488bbae541a 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -288,12 +288,16 @@ eio: goto out; } +#define RR_REGARD_XA 1 +#define RR_RELOC_DE 2 + static int parse_rock_ridge_inode_internal(struct iso_directory_record *de, - struct inode *inode, int regard_xa) + struct inode *inode, int flags) { int symlink_len = 0; int cnt, sig; + unsigned int reloc_block; struct inode *reloc; struct rock_ridge *rr; int rootflag; @@ -305,7 +309,7 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de, init_rock_state(&rs, inode); setup_rock_ridge(de, inode, &rs); - if (regard_xa) { + if (flags & RR_REGARD_XA) { rs.chr += 14; rs.len -= 14; if (rs.len < 0) @@ -485,12 +489,22 @@ repeat: "relocated directory\n"); goto out; case SIG('C', 'L'): - ISOFS_I(inode)->i_first_extent = - isonum_733(rr->u.CL.location); - reloc = - isofs_iget(inode->i_sb, - ISOFS_I(inode)->i_first_extent, - 0); + if (flags & RR_RELOC_DE) { + printk(KERN_ERR + "ISOFS: Recursive directory relocation " + "is not supported\n"); + goto eio; + } + reloc_block = isonum_733(rr->u.CL.location); + if (reloc_block == ISOFS_I(inode)->i_iget5_block && + ISOFS_I(inode)->i_iget5_offset == 0) { + printk(KERN_ERR + "ISOFS: Directory relocation points to " + "itself\n"); + goto eio; + } + ISOFS_I(inode)->i_first_extent = reloc_block; + reloc = isofs_iget_reloc(inode->i_sb, reloc_block, 0); if (IS_ERR(reloc)) { ret = PTR_ERR(reloc); goto out; @@ -637,9 +651,11 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit) return rpnt; } -int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode) +int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode, + int relocated) { - int result = parse_rock_ridge_inode_internal(de, inode, 0); + int flags = relocated ? RR_RELOC_DE : 0; + int result = parse_rock_ridge_inode_internal(de, inode, flags); /* * if rockridge flag was reset and we didn't look for attributes @@ -647,7 +663,8 @@ int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode) */ if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1) && (ISOFS_SB(inode->i_sb)->s_rock == 2)) { - result = parse_rock_ridge_inode_internal(de, inode, 14); + result = parse_rock_ridge_inode_internal(de, inode, + flags | RR_REGARD_XA); } return result; } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ba491926df5f..be7cbce6e4c7 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -116,7 +116,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c) if (atomic_read(&c->io_count) == 0) break; ret = nfs_wait_bit_killable(&q.key); - } while (atomic_read(&c->io_count) != 0); + } while (atomic_read(&c->io_count) != 0 && !ret); finish_wait(wq, &q.wait); return ret; } @@ -139,26 +139,49 @@ nfs_iocounter_wait(struct nfs_io_counter *c) /* * nfs_page_group_lock - lock the head of the page group * @req - request in group that is to be locked + * @nonblock - if true don't block waiting for lock * * this lock must be held if modifying the page group list * - * returns result from wait_on_bit_lock: 0 on success, < 0 on error + * return 0 on success, < 0 on error: -EDELAY if nonblocking or the + * result from wait_on_bit_lock + * + * NOTE: calling with nonblock=false should always have set the + * lock bit (see fs/buffer.c and other uses of wait_on_bit_lock + * with TASK_UNINTERRUPTIBLE), so there is no need to check the result. */ int -nfs_page_group_lock(struct nfs_page *req, bool wait) +nfs_page_group_lock(struct nfs_page *req, bool nonblock) { struct nfs_page *head = req->wb_head; - int ret; WARN_ON_ONCE(head != head->wb_head); - do { - ret = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, - TASK_UNINTERRUPTIBLE); - } while (wait && ret != 0); + if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags)) + return 0; - WARN_ON_ONCE(ret > 0); - return ret; + if (!nonblock) + return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, + TASK_UNINTERRUPTIBLE); + + return -EAGAIN; +} + +/* + * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it + * @req - a request in the group + * + * This is a blocking call to wait for the group lock to be cleared. + */ +void +nfs_page_group_lock_wait(struct nfs_page *req) +{ + struct nfs_page *head = req->wb_head; + + WARN_ON_ONCE(head != head->wb_head); + + wait_on_bit(&head->wb_flags, PG_HEADLOCK, + TASK_UNINTERRUPTIBLE); } /* @@ -219,7 +242,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) { bool ret; - nfs_page_group_lock(req, true); + nfs_page_group_lock(req, false); ret = nfs_page_group_sync_on_bit_locked(req, bit); nfs_page_group_unlock(req); @@ -701,10 +724,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { struct nfs_page *req; - struct page **pages; + struct page **pages, + *last_page; struct list_head *head = &desc->pg_list; struct nfs_commit_info cinfo; - unsigned int pagecount; + unsigned int pagecount, pageused; pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); if (!nfs_pgarray_set(&hdr->page_array, pagecount)) @@ -712,12 +736,23 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); pages = hdr->page_array.pagevec; + last_page = NULL; + pageused = 0; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_list_add_request(req, &hdr->pages); - *pages++ = req->wb_page; + + if (WARN_ON_ONCE(pageused >= pagecount)) + return nfs_pgio_error(desc, hdr); + + if (!last_page || last_page != req->wb_page) { + *pages++ = last_page = req->wb_page; + pageused++; + } } + if (WARN_ON_ONCE(pageused != pagecount)) + return nfs_pgio_error(desc, hdr); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) @@ -788,6 +823,14 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, return false; if (req_offset(req) != req_offset(prev) + prev->wb_bytes) return false; + if (req->wb_page == prev->wb_page) { + if (req->wb_pgbase != prev->wb_pgbase + prev->wb_bytes) + return false; + } else { + if (req->wb_pgbase != 0 || + prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) + return false; + } } size = pgio->pg_ops->pg_test(pgio, prev, req); WARN_ON_ONCE(size > req->wb_bytes); @@ -858,13 +901,8 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *subreq; unsigned int bytes_left = 0; unsigned int offset, pgbase; - int ret; - ret = nfs_page_group_lock(req, false); - if (ret < 0) { - desc->pg_error = ret; - return 0; - } + nfs_page_group_lock(req, false); subreq = req; bytes_left = subreq->wb_bytes; @@ -886,11 +924,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, if (desc->pg_recoalesce) return 0; /* retry add_request for this subreq */ - ret = nfs_page_group_lock(req, false); - if (ret < 0) { - desc->pg_error = ret; - return 0; - } + nfs_page_group_lock(req, false); continue; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e3b5cf28bdc5..175d5d073ccf 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -241,7 +241,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req) unsigned int pos = 0; unsigned int len = nfs_page_length(req->wb_page); - nfs_page_group_lock(req, true); + nfs_page_group_lock(req, false); do { tmp = nfs_page_group_search_locked(req->wb_head, pos); @@ -478,10 +478,23 @@ try_again: return NULL; } - /* lock each request in the page group */ - ret = nfs_page_group_lock(head, false); - if (ret < 0) + /* holding inode lock, so always make a non-blocking call to try the + * page group lock */ + ret = nfs_page_group_lock(head, true); + if (ret < 0) { + spin_unlock(&inode->i_lock); + + if (!nonblock && ret == -EAGAIN) { + nfs_page_group_lock_wait(head); + nfs_release_request(head); + goto try_again; + } + + nfs_release_request(head); return ERR_PTR(ret); + } + + /* lock each request in the page group */ subreq = head; do { /* diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 9737cba1357d..83a06001742b 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -1014,7 +1014,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); if (!fi) - goto out_no_entry; + goto out_fail; cfi.icb.extLength = cpu_to_le32(sb->s_blocksize); cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location); if (UDF_SB(inode->i_sb)->s_lvid_bh) { @@ -1036,6 +1036,7 @@ out: out_no_entry: up_write(&iinfo->i_data_sem); +out_fail: inode_dec_link_count(inode); iput(inode); goto out; |