From 71795ee590111e3636cc3c148289dfa9fa0a5fc3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 29 Apr 2021 10:51:34 -0400 Subject: btrfs: avoid RCU stalls while running delayed iputs Generally a delayed iput is added when we might do the final iput, so usually we'll end up sleeping while processing the delayed iputs naturally. However there's no guarantee of this, especially for small files. In production we noticed 5 instances of RCU stalls while testing a kernel release overnight across 1000 machines, so this is relatively common: host count: 5 rcu: INFO: rcu_sched self-detected stall on CPU rcu: ....: (20998 ticks this GP) idle=59e/1/0x4000000000000002 softirq=12333372/12333372 fqs=3208 (t=21031 jiffies g=27810193 q=41075) NMI backtrace for cpu 1 CPU: 1 PID: 1713 Comm: btrfs-cleaner Kdump: loaded Not tainted 5.6.13-0_fbk12_rc1_5520_gec92bffc1ec9 #1 Call Trace: dump_stack+0x50/0x70 nmi_cpu_backtrace.cold.6+0x30/0x65 ? lapic_can_unplug_cpu.cold.30+0x40/0x40 nmi_trigger_cpumask_backtrace+0xba/0xca rcu_dump_cpu_stacks+0x99/0xc7 rcu_sched_clock_irq.cold.90+0x1b2/0x3a3 ? trigger_load_balance+0x5c/0x200 ? tick_sched_do_timer+0x60/0x60 ? tick_sched_do_timer+0x60/0x60 update_process_times+0x24/0x50 tick_sched_timer+0x37/0x70 __hrtimer_run_queues+0xfe/0x270 hrtimer_interrupt+0xf4/0x210 smp_apic_timer_interrupt+0x5e/0x120 apic_timer_interrupt+0xf/0x20 RIP: 0010:queued_spin_lock_slowpath+0x17d/0x1b0 RSP: 0018:ffffc9000da5fe48 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: 0000000000000000 RBX: ffff889fa81d0cd8 RCX: 0000000000000029 RDX: ffff889fff86c0c0 RSI: 0000000000080000 RDI: ffff88bfc2da7200 RBP: ffff888f2dcdd768 R08: 0000000001040000 R09: 0000000000000000 R10: 0000000000000001 R11: ffffffff82a55560 R12: ffff88bfc2da7200 R13: 0000000000000000 R14: ffff88bff6c2a360 R15: ffffffff814bd870 ? kzalloc.constprop.57+0x30/0x30 list_lru_add+0x5a/0x100 inode_lru_list_add+0x20/0x40 iput+0x1c1/0x1f0 run_delayed_iput_locked+0x46/0x90 btrfs_run_delayed_iputs+0x3f/0x60 cleaner_kthread+0xf2/0x120 kthread+0x10b/0x130 Fix this by adding a cond_resched_lock() to the loop processing delayed iputs so we can avoid these sort of stalls. CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Rik van Riel Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 69fcdf8f0b1c..095e452f59f0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3246,6 +3246,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) inode = list_first_entry(&fs_info->delayed_iputs, struct btrfs_inode, delayed_iput); run_delayed_iput_locked(fs_info, inode); + cond_resched_lock(&fs_info->delayed_iput_lock); } spin_unlock(&fs_info->delayed_iput_lock); } -- cgit From e380adfc213a13677993c0e35cb48f5a8e61ebb0 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 19 May 2021 00:40:27 +0900 Subject: btrfs: zoned: pass start block to btrfs_use_zone_append btrfs_use_zone_append only needs the passed in extent_map's block_start member, so there's no need to pass in the full extent map. This also enables the use of btrfs_use_zone_append in places where we only have a start byte but no extent_map. Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 +- fs/btrfs/inode.c | 2 +- fs/btrfs/zoned.c | 4 ++-- fs/btrfs/zoned.h | 5 ++--- 4 files changed, 6 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 360d997c7226..d9f20ca3ac7d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3762,7 +3762,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, /* Note that em_end from extent_map_end() is exclusive */ iosize = min(em_end, end + 1) - cur; - if (btrfs_use_zone_append(inode, em)) + if (btrfs_use_zone_append(inode, em->block_start)) opf = REQ_OP_ZONE_APPEND; free_extent_map(em); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 095e452f59f0..bb4ab408d670 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7796,7 +7796,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, iomap->bdev = fs_info->fs_devices->latest_bdev; iomap->length = len; - if (write && btrfs_use_zone_append(BTRFS_I(inode), em)) + if (write && btrfs_use_zone_append(BTRFS_I(inode), em->block_start)) iomap->flags |= IOMAP_F_ZONE_APPEND; free_extent_map(em); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 304ce64c70a4..1bb8ee97aae0 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1278,7 +1278,7 @@ void btrfs_free_redirty_list(struct btrfs_transaction *trans) spin_unlock(&trans->releasing_ebs_lock); } -bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em) +bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_block_group *cache; @@ -1293,7 +1293,7 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em) if (!is_data_inode(&inode->vfs_inode)) return false; - cache = btrfs_lookup_block_group(fs_info, em->block_start); + cache = btrfs_lookup_block_group(fs_info, start); ASSERT(cache); if (!cache) return false; diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 5e41a74a9cb2..e55d32595c2c 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -53,7 +53,7 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache); void btrfs_redirty_list_add(struct btrfs_transaction *trans, struct extent_buffer *eb); void btrfs_free_redirty_list(struct btrfs_transaction *trans); -bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em); +bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start); void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset, struct bio *bio); void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered); @@ -152,8 +152,7 @@ static inline void btrfs_redirty_list_add(struct btrfs_transaction *trans, struct extent_buffer *eb) { } static inline void btrfs_free_redirty_list(struct btrfs_transaction *trans) { } -static inline bool btrfs_use_zone_append(struct btrfs_inode *inode, - struct extent_map *em) +static inline bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start) { return false; } -- cgit