diff options
author | Jakub Kicinski <kuba@kernel.org> | 2024-07-12 22:17:55 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2024-07-12 22:20:30 -0700 |
commit | e5abd12f3df13e92e2fb3c02fe825aa6c57f8306 (patch) | |
tree | b61be7584dff0f15524cb1b97abf6efe1f626885 /fs | |
parent | 75fd6079326da816caba51f9a4192c1876f37423 (diff) | |
parent | 528dd46d0fc35c0176257a13a27d41e44fcc6cb3 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR.
Conflicts:
drivers/net/ethernet/broadcom/bnxt/bnxt.c
f7ce5eb2cb79 ("bnxt_en: Fix crash in bnxt_get_max_rss_ctx_ring()")
20c8ad72eb7f ("eth: bnxt: use the RSS context XArray instead of the local list")
Adjacent changes:
net/ethtool/ioctl.c
503757c80928 ("net: ethtool: Fix RSS setting")
eac9122f0c41 ("net: ethtool: record custom RSS contexts in the XArray")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/btree_gc.c | 30 | ||||
-rw-r--r-- | fs/bcachefs/btree_iter.c | 7 | ||||
-rw-r--r-- | fs/bcachefs/btree_locking.c | 10 | ||||
-rw-r--r-- | fs/bcachefs/btree_locking.h | 22 | ||||
-rw-r--r-- | fs/bcachefs/btree_types.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/buckets.h | 8 | ||||
-rw-r--r-- | fs/bcachefs/fs.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/io_misc.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/util.c | 25 | ||||
-rw-r--r-- | fs/bcachefs/util.h | 1 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 2 | ||||
-rw-r--r-- | fs/btrfs/extent_map.c | 123 | ||||
-rw-r--r-- | fs/btrfs/fs.h | 1 |
14 files changed, 184 insertions, 53 deletions
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 7c72a9e6f511..a0deb8266011 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -641,16 +641,30 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in target_depth = 0; /* root */ - mutex_lock(&c->btree_root_lock); - struct btree *b = bch2_btree_id_root(c, btree)->b; - if (!btree_node_fake(b)) { + do { +retry_root: + bch2_trans_begin(trans); + + struct btree_iter iter; + bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, + 0, bch2_btree_id_root(c, btree)->b->c.level, 0); + struct btree *b = bch2_btree_iter_peek_node(&iter); + ret = PTR_ERR_OR_ZERO(b); + if (ret) + goto err_root; + + if (b != btree_node_root(c, b)) { + bch2_trans_iter_exit(trans, &iter); + goto retry_root; + } + gc_pos_set(c, gc_pos_btree(btree, b->c.level + 1, SPOS_MAX)); - ret = lockrestart_do(trans, - bch2_gc_mark_key(trans, b->c.btree_id, b->c.level + 1, - NULL, NULL, bkey_i_to_s_c(&b->key), initial)); + struct bkey_s_c k = bkey_i_to_s_c(&b->key); + ret = bch2_gc_mark_key(trans, btree, b->c.level + 1, NULL, NULL, k, initial); level = b->c.level; - } - mutex_unlock(&c->btree_root_lock); +err_root: + bch2_trans_iter_exit(trans, &iter); + } while (bch2_err_matches(ret, BCH_ERR_transaction_restart)); if (ret) return ret; diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 0ed9e6574fcd..19352a08ea20 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -996,7 +996,7 @@ retry_all: bch2_trans_unlock(trans); cond_resched(); - trans->locked = true; + trans_set_locked(trans); if (unlikely(trans->memory_allocation_failure)) { struct closure cl; @@ -3089,7 +3089,8 @@ u32 bch2_trans_begin(struct btree_trans *trans) bch2_trans_srcu_unlock(trans); trans->last_begin_ip = _RET_IP_; - trans->locked = true; + + trans_set_locked(trans); if (trans->restarted) { bch2_btree_path_traverse_all(trans); @@ -3159,7 +3160,6 @@ got_trans: trans->last_begin_time = local_clock(); trans->fn_idx = fn_idx; trans->locking_wait.task = current; - trans->locked = true; trans->journal_replay_not_finished = unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags)) && atomic_inc_not_zero(&c->journal_keys.ref); @@ -3193,6 +3193,7 @@ got_trans: trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); trans->srcu_lock_time = jiffies; trans->srcu_held = true; + trans_set_locked(trans); closure_init_stack_release(&trans->ref); return trans; diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index d66fff22109a..c51826fd557f 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -231,7 +231,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle) prt_newline(&buf); } - bch2_print_string_as_lines(KERN_ERR, buf.buf); + bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf); printbuf_exit(&buf); BUG(); } @@ -792,7 +792,7 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace) return bch2_trans_relock_fail(trans, path, &f, trace); } - trans->locked = true; + trans_set_locked(trans); out: bch2_trans_verify_locks(trans); return 0; @@ -812,16 +812,14 @@ void bch2_trans_unlock_noassert(struct btree_trans *trans) { __bch2_trans_unlock(trans); - trans->locked = false; - trans->last_unlock_ip = _RET_IP_; + trans_set_unlocked(trans); } void bch2_trans_unlock(struct btree_trans *trans) { __bch2_trans_unlock(trans); - trans->locked = false; - trans->last_unlock_ip = _RET_IP_; + trans_set_unlocked(trans); } void bch2_trans_unlock_long(struct btree_trans *trans) diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index 7f41545b9147..75a6274c7d27 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -193,6 +193,28 @@ int bch2_six_check_for_deadlock(struct six_lock *lock, void *p); /* lock: */ +static inline void trans_set_locked(struct btree_trans *trans) +{ + if (!trans->locked) { + trans->locked = true; + trans->last_unlock_ip = 0; + + trans->pf_memalloc_nofs = (current->flags & PF_MEMALLOC_NOFS) != 0; + current->flags |= PF_MEMALLOC_NOFS; + } +} + +static inline void trans_set_unlocked(struct btree_trans *trans) +{ + if (trans->locked) { + trans->locked = false; + trans->last_unlock_ip = _RET_IP_; + + if (!trans->pf_memalloc_nofs) + current->flags &= ~PF_MEMALLOC_NOFS; + } +} + static inline int __btree_node_lock_nopath(struct btree_trans *trans, struct btree_bkey_cached_common *b, enum six_lock_type type, diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 87f485e9c552..48cb1a7d31c5 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -484,6 +484,7 @@ struct btree_trans { bool lock_may_not_fail:1; bool srcu_held:1; bool locked:1; + bool pf_memalloc_nofs:1; bool write_locked:1; bool used_mempool:1; bool in_traverse_all:1; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 743d57eba760..314ee3e0187f 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -805,7 +805,7 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n" "while marking %s", ptr->dev, bucket_nr, b_gen, - *bucket_gen(ca, bucket_nr), + bucket_gen_get(ca, bucket_nr), bch2_data_type_str(bucket_data_type ?: ptr_data_type), ptr->gen, (printbuf_reset(&buf), diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 80ee0be9793e..8ad4be73860c 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -116,6 +116,14 @@ static inline u8 *bucket_gen(struct bch_dev *ca, size_t b) return gens->b + b; } +static inline u8 bucket_gen_get(struct bch_dev *ca, size_t b) +{ + rcu_read_lock(); + u8 gen = *bucket_gen(ca, b); + rcu_read_unlock(); + return gen; +} + static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca, const struct bch_extent_ptr *ptr) { diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 74a1e12a7a14..fa1fee05cf8f 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -2073,8 +2073,7 @@ int __init bch2_vfs_init(void) { int ret = -ENOMEM; - bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT | - SLAB_ACCOUNT); + bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT); if (!bch2_inode_cache) goto err; diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 4ec979b4b23e..4583c9386e8c 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -125,7 +125,7 @@ err_noprint: bch2_bkey_buf_exit(&old, c); if (closure_nr_remaining(&cl) != 1) { - bch2_trans_unlock(trans); + bch2_trans_unlock_long(trans); closure_sync(&cl); } diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index de331dec2a99..4ec7e44d6e36 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -252,8 +252,10 @@ void bch2_prt_u64_base2(struct printbuf *out, u64 v) bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1); } -void bch2_print_string_as_lines(const char *prefix, const char *lines) +static void __bch2_print_string_as_lines(const char *prefix, const char *lines, + bool nonblocking) { + bool locked = false; const char *p; if (!lines) { @@ -261,7 +263,13 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines) return; } - console_lock(); + if (!nonblocking) { + console_lock(); + locked = true; + } else { + locked = console_trylock(); + } + while (1) { p = strchrnul(lines, '\n'); printk("%s%.*s\n", prefix, (int) (p - lines), lines); @@ -269,7 +277,18 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines) break; lines = p + 1; } - console_unlock(); + if (locked) + console_unlock(); +} + +void bch2_print_string_as_lines(const char *prefix, const char *lines) +{ + return __bch2_print_string_as_lines(prefix, lines, false); +} + +void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines) +{ + return __bch2_print_string_as_lines(prefix, lines, true); } int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr, diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 5d2c470a49ac..5b0533ec4c7e 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -315,6 +315,7 @@ void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned); void bch2_prt_u64_base2(struct printbuf *, u64); void bch2_print_string_as_lines(const char *prefix, const char *lines); +void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines); typedef DARRAY(unsigned long) bch_stacktrace; int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 38cdb8875e8e..cabb558dbdaa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2856,6 +2856,8 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block if (ret) return ret; + spin_lock_init(&fs_info->extent_map_shrinker_lock); + ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); if (ret) return ret; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 744e8952abb0..b4c9a6aa118c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1028,7 +1028,14 @@ out_free_pre: return ret; } -static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_to_scan) +struct btrfs_em_shrink_ctx { + long nr_to_scan; + long scanned; + u64 last_ino; + u64 last_root; +}; + +static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_ctx *ctx) { const u64 cur_fs_gen = btrfs_get_fs_generation(inode->root->fs_info); struct extent_map_tree *tree = &inode->extent_tree; @@ -1057,14 +1064,25 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t if (!down_read_trylock(&inode->i_mmap_lock)) return 0; - write_lock(&tree->lock); + /* + * We want to be fast because we can be called from any path trying to + * allocate memory, so if the lock is busy we don't want to spend time + * waiting for it - either some task is about to do IO for the inode or + * we may have another task shrinking extent maps, here in this code, so + * skip this inode. + */ + if (!write_trylock(&tree->lock)) { + up_read(&inode->i_mmap_lock); + return 0; + } + node = rb_first_cached(&tree->map); while (node) { struct extent_map *em; em = rb_entry(node, struct extent_map, rb_node); node = rb_next(node); - (*scanned)++; + ctx->scanned++; if (em->flags & EXTENT_FLAG_PINNED) goto next; @@ -1085,16 +1103,18 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t free_extent_map(em); nr_dropped++; next: - if (*scanned >= nr_to_scan) + if (ctx->scanned >= ctx->nr_to_scan) break; /* - * Restart if we had to reschedule, and any extent maps that were - * pinned before may have become unpinned after we released the - * lock and took it again. + * Stop if we need to reschedule or there's contention on the + * lock. This is to avoid slowing other tasks trying to take the + * lock and because the shrinker might be called during a memory + * allocation path and we want to avoid taking a very long time + * and slowing down all sorts of tasks. */ - if (cond_resched_rwlock_write(&tree->lock)) - node = rb_first_cached(&tree->map); + if (need_resched() || rwlock_needbreak(&tree->lock)) + break; } write_unlock(&tree->lock); up_read(&inode->i_mmap_lock); @@ -1102,25 +1122,30 @@ next: return nr_dropped; } -static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_scan) +static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx *ctx) { - struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_inode *inode; long nr_dropped = 0; - u64 min_ino = fs_info->extent_map_shrinker_last_ino + 1; + u64 min_ino = ctx->last_ino + 1; inode = btrfs_find_first_inode(root, min_ino); while (inode) { - nr_dropped += btrfs_scan_inode(inode, scanned, nr_to_scan); + nr_dropped += btrfs_scan_inode(inode, ctx); min_ino = btrfs_ino(inode) + 1; - fs_info->extent_map_shrinker_last_ino = btrfs_ino(inode); - iput(&inode->vfs_inode); + ctx->last_ino = btrfs_ino(inode); + btrfs_add_delayed_iput(inode); - if (*scanned >= nr_to_scan) + if (ctx->scanned >= ctx->nr_to_scan) + break; + + /* + * We may be called from memory allocation paths, so we don't + * want to take too much time and slowdown tasks. + */ + if (need_resched()) break; - cond_resched(); inode = btrfs_find_first_inode(root, min_ino); } @@ -1132,14 +1157,14 @@ static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_s * inode if there is one or we will find out this was the last * one and move to the next root. */ - fs_info->extent_map_shrinker_last_root = btrfs_root_id(root); + ctx->last_root = btrfs_root_id(root); } else { /* * No more inodes in this root, set extent_map_shrinker_last_ino to 0 so * that when processing the next root we start from its first inode. */ - fs_info->extent_map_shrinker_last_ino = 0; - fs_info->extent_map_shrinker_last_root = btrfs_root_id(root) + 1; + ctx->last_ino = 0; + ctx->last_root = btrfs_root_id(root) + 1; } return nr_dropped; @@ -1147,19 +1172,41 @@ static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_s long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan) { - const u64 start_root_id = fs_info->extent_map_shrinker_last_root; - u64 next_root_id = start_root_id; + struct btrfs_em_shrink_ctx ctx; + u64 start_root_id; + u64 next_root_id; bool cycled = false; long nr_dropped = 0; - long scanned = 0; + + ctx.scanned = 0; + ctx.nr_to_scan = nr_to_scan; + + /* + * In case we have multiple tasks running this shrinker, make the next + * one start from the next inode in case it starts before we finish. + */ + spin_lock(&fs_info->extent_map_shrinker_lock); + ctx.last_ino = fs_info->extent_map_shrinker_last_ino; + fs_info->extent_map_shrinker_last_ino++; + ctx.last_root = fs_info->extent_map_shrinker_last_root; + spin_unlock(&fs_info->extent_map_shrinker_lock); + + start_root_id = ctx.last_root; + next_root_id = ctx.last_root; if (trace_btrfs_extent_map_shrinker_scan_enter_enabled()) { s64 nr = percpu_counter_sum_positive(&fs_info->evictable_extent_maps); - trace_btrfs_extent_map_shrinker_scan_enter(fs_info, nr_to_scan, nr); + trace_btrfs_extent_map_shrinker_scan_enter(fs_info, nr_to_scan, + nr, ctx.last_root, + ctx.last_ino); } - while (scanned < nr_to_scan) { + /* + * We may be called from memory allocation paths, so we don't want to + * take too much time and slowdown tasks, so stop if we need reschedule. + */ + while (ctx.scanned < ctx.nr_to_scan && !need_resched()) { struct btrfs_root *root; unsigned long count; @@ -1171,8 +1218,8 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan) spin_unlock(&fs_info->fs_roots_radix_lock); if (start_root_id > 0 && !cycled) { next_root_id = 0; - fs_info->extent_map_shrinker_last_root = 0; - fs_info->extent_map_shrinker_last_ino = 0; + ctx.last_root = 0; + ctx.last_ino = 0; cycled = true; continue; } @@ -1186,15 +1233,33 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan) continue; if (is_fstree(btrfs_root_id(root))) - nr_dropped += btrfs_scan_root(root, &scanned, nr_to_scan); + nr_dropped += btrfs_scan_root(root, &ctx); btrfs_put_root(root); } + /* + * In case of multiple tasks running this extent map shrinking code this + * isn't perfect but it's simple and silences things like KCSAN. It's + * not possible to know which task made more progress because we can + * cycle back to the first root and first inode if it's not the first + * time the shrinker ran, see the above logic. Also a task that started + * later may finish ealier than another task and made less progress. So + * make this simple and update to the progress of the last task that + * finished, with the occasional possiblity of having two consecutive + * runs of the shrinker process the same inodes. + */ + spin_lock(&fs_info->extent_map_shrinker_lock); + fs_info->extent_map_shrinker_last_ino = ctx.last_ino; + fs_info->extent_map_shrinker_last_root = ctx.last_root; + spin_unlock(&fs_info->extent_map_shrinker_lock); + if (trace_btrfs_extent_map_shrinker_scan_exit_enabled()) { s64 nr = percpu_counter_sum_positive(&fs_info->evictable_extent_maps); - trace_btrfs_extent_map_shrinker_scan_exit(fs_info, nr_dropped, nr); + trace_btrfs_extent_map_shrinker_scan_exit(fs_info, nr_dropped, + nr, ctx.last_root, + ctx.last_ino); } return nr_dropped; diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 89f0650631cd..833dc3fe0a38 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -630,6 +630,7 @@ struct btrfs_fs_info { s32 delalloc_batch; struct percpu_counter evictable_extent_maps; + spinlock_t extent_map_shrinker_lock; u64 extent_map_shrinker_last_root; u64 extent_map_shrinker_last_ino; |