aboutsummaryrefslogtreecommitdiff
path: root/fs/f2fs/node.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-04 10:48:47 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-04 10:48:47 -0700
commit6abaa83c7352b31450d7e8c173f674324c16b02b (patch)
treecea534b220e9635bb2af785a954292e416ebdca9 /fs/f2fs/node.c
parent0961f0c00e69672a8e4a2e591355567dbda44389 (diff)
parent9605f75cf36e0bcc0f4ada07b5be712d30107607 (diff)
Merge tag 'f2fs-for-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this cycle, we've addressed some performance issues such as lock contention, misbehaving compress_cache, allowing extent_cache for compressed files, and new sysfs to adjust ra_size for fadvise. In order to diagnose the performance issues quickly, we also added an iostat which shows the IO latencies periodically. On the stability side, we've found two memory leakage cases in the error path in compression flow. And, we've also fixed various corner cases in fiemap, quota, checkpoint=disable, zstd, and so on. Enhancements: - avoid long checkpoint latency by releasing nat_tree_lock - collect and show iostats periodically - support extent_cache for compressed files - add a sysfs entry to manage ra_size given fadvise(POSIX_FADV_SEQUENTIAL) - report f2fs GC status via sysfs - add discard_unit=%s in mount option to handle zoned device Bug fixes: - fix two memory leakages when an error happens in the compressed IO flow - fix commpress_cache to get the right LBA - fix fiemap to deal with compressed case correctly - fix wrong EIO returns due to SBI_NEED_FSCK - fix missing writes when enabling checkpoint back - fix quota deadlock - fix zstd level mount option In addition to the above major updates, we've cleaned up several code paths such as dio, unnecessary operations, debugfs/f2fs/status, sanity check, and typos" * tag 'f2fs-for-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (46 commits) f2fs: should put a page beyond EOF when preparing a write f2fs: deallocate compressed pages when error happens f2fs: enable realtime discard iff device supports discard f2fs: guarantee to write dirty data when enabling checkpoint back f2fs: fix to unmap pages from userspace process in punch_hole() f2fs: fix unexpected ENOENT comes from f2fs_map_blocks() f2fs: fix to account missing .skipped_gc_rwsem f2fs: adjust unlock order for cleanup f2fs: Don't create discard thread when device doesn't support realtime discard f2fs: rebuild nat_bits during umount f2fs: introduce periodic iostat io latency traces f2fs: separate out iostat feature f2fs: compress: do sanity check on cluster f2fs: fix description about main_blkaddr node f2fs: convert S_IRUGO to 0444 f2fs: fix to keep compatibility of fault injection interface f2fs: support fault injection for f2fs_kmem_cache_alloc() f2fs: compress: allow write compress released file after truncate to zero f2fs: correct comment in segment.h f2fs: improve sbi status info in debugfs/f2fs/status ...
Diffstat (limited to 'fs/f2fs/node.c')
-rw-r--r--fs/f2fs/node.c165
1 files changed, 126 insertions, 39 deletions
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..e863136081b4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -17,6 +17,7 @@
#include "node.h"
#include "segment.h"
#include "xattr.h"
+#include "iostat.h"
#include <trace/events/f2fs.h>
#define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock)
@@ -162,14 +163,13 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
return dst_page;
}
-static struct nat_entry *__alloc_nat_entry(nid_t nid, bool no_fail)
+static struct nat_entry *__alloc_nat_entry(struct f2fs_sb_info *sbi,
+ nid_t nid, bool no_fail)
{
struct nat_entry *new;
- if (no_fail)
- new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO);
- else
- new = kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO);
+ new = f2fs_kmem_cache_alloc(nat_entry_slab,
+ GFP_F2FS_ZERO, no_fail, sbi);
if (new) {
nat_set_nid(new, nid);
nat_reset_flag(new);
@@ -242,7 +242,8 @@ static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i,
head = radix_tree_lookup(&nm_i->nat_set_root, set);
if (!head) {
- head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS);
+ head = f2fs_kmem_cache_alloc(nat_entry_set_slab,
+ GFP_NOFS, true, NULL);
INIT_LIST_HEAD(&head->entry_list);
INIT_LIST_HEAD(&head->set_list);
@@ -329,7 +330,8 @@ static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi,
unsigned long flags;
unsigned int seq_id;
- fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS);
+ fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab,
+ GFP_NOFS, true, NULL);
get_page(page);
fn->page = page;
@@ -428,7 +430,7 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *new, *e;
- new = __alloc_nat_entry(nid, false);
+ new = __alloc_nat_entry(sbi, nid, false);
if (!new)
return;
@@ -451,7 +453,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
- struct nat_entry *new = __alloc_nat_entry(ni->nid, true);
+ struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true);
down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ni->nid);
@@ -552,7 +554,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
int i;
ni->nid = nid;
-
+retry:
/* Check nat cache */
down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
@@ -564,10 +566,19 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
return 0;
}
- memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+ /*
+ * Check current segment summary by trying to grab journal_rwsem first.
+ * This sem is on the critical path on the checkpoint requiring the above
+ * nat_tree_lock. Therefore, we should retry, if we failed to grab here
+ * while not bothering checkpoint.
+ */
+ if (!rwsem_is_locked(&sbi->cp_global_sem)) {
+ down_read(&curseg->journal_rwsem);
+ } else if (!down_read_trylock(&curseg->journal_rwsem)) {
+ up_read(&nm_i->nat_tree_lock);
+ goto retry;
+ }
- /* Check current segment summary */
- down_read(&curseg->journal_rwsem);
i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
if (i >= 0) {
ne = nat_in_journal(journal, i);
@@ -832,6 +843,26 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
dn->ofs_in_node = offset[level];
dn->node_page = npage[level];
dn->data_blkaddr = f2fs_data_blkaddr(dn);
+
+ if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) &&
+ f2fs_sb_has_readonly(sbi)) {
+ unsigned int c_len = f2fs_cluster_blocks_are_contiguous(dn);
+ block_t blkaddr;
+
+ if (!c_len)
+ goto out;
+
+ blkaddr = f2fs_data_blkaddr(dn);
+ if (blkaddr == COMPRESS_ADDR)
+ blkaddr = data_blkaddr(dn->inode, dn->node_page,
+ dn->ofs_in_node + 1);
+
+ f2fs_update_extent_tree_range_compressed(dn->inode,
+ index, blkaddr,
+ F2FS_I(dn->inode)->i_cluster_size,
+ c_len);
+ }
+out:
return 0;
release_pages:
@@ -1321,7 +1352,8 @@ static int read_node_page(struct page *page, int op_flags)
if (err)
return err;
- if (unlikely(ni.blk_addr == NULL_ADDR) ||
+ /* NEW_ADDR can be seen, after cp_error drops some dirty node pages */
+ if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR) ||
is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) {
ClearPageUptodate(page);
return -ENOENT;
@@ -2181,6 +2213,24 @@ static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i,
}
}
+bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ unsigned int i;
+ bool ret = true;
+
+ down_read(&nm_i->nat_tree_lock);
+ for (i = 0; i < nm_i->nat_blocks; i++) {
+ if (!test_bit_le(i, nm_i->nat_block_bitmap)) {
+ ret = false;
+ break;
+ }
+ }
+ up_read(&nm_i->nat_tree_lock);
+
+ return ret;
+}
+
static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
bool set, bool build)
{
@@ -2222,7 +2272,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi,
if (unlikely(f2fs_check_nid_range(sbi, nid)))
return false;
- i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
+ i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS, true, NULL);
i->nid = nid;
i->state = FREE_NID;
@@ -2812,7 +2862,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
ne = __lookup_nat_cache(nm_i, nid);
if (!ne) {
- ne = __alloc_nat_entry(nid, true);
+ ne = __alloc_nat_entry(sbi, nid, true);
__init_nat_entry(nm_i, ne, &raw_ne, true);
}
@@ -2852,7 +2902,23 @@ add_out:
list_add_tail(&nes->set_list, head);
}
-static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+static void __update_nat_bits(struct f2fs_nm_info *nm_i, unsigned int nat_ofs,
+ unsigned int valid)
+{
+ if (valid == 0) {
+ __set_bit_le(nat_ofs, nm_i->empty_nat_bits);
+ __clear_bit_le(nat_ofs, nm_i->full_nat_bits);
+ return;
+ }
+
+ __clear_bit_le(nat_ofs, nm_i->empty_nat_bits);
+ if (valid == NAT_ENTRY_PER_BLOCK)
+ __set_bit_le(nat_ofs, nm_i->full_nat_bits);
+ else
+ __clear_bit_le(nat_ofs, nm_i->full_nat_bits);
+}
+
+static void update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
struct page *page)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -2861,7 +2927,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
int valid = 0;
int i = 0;
- if (!enabled_nat_bits(sbi, NULL))
+ if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
return;
if (nat_index == 0) {
@@ -2872,17 +2938,36 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR)
valid++;
}
- if (valid == 0) {
- __set_bit_le(nat_index, nm_i->empty_nat_bits);
- __clear_bit_le(nat_index, nm_i->full_nat_bits);
- return;
+
+ __update_nat_bits(nm_i, nat_index, valid);
+}
+
+void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ unsigned int nat_ofs;
+
+ down_read(&nm_i->nat_tree_lock);
+
+ for (nat_ofs = 0; nat_ofs < nm_i->nat_blocks; nat_ofs++) {
+ unsigned int valid = 0, nid_ofs = 0;
+
+ /* handle nid zero due to it should never be used */
+ if (unlikely(nat_ofs == 0)) {
+ valid = 1;
+ nid_ofs = 1;
+ }
+
+ for (; nid_ofs < NAT_ENTRY_PER_BLOCK; nid_ofs++) {
+ if (!test_bit_le(nid_ofs,
+ nm_i->free_nid_bitmap[nat_ofs]))
+ valid++;
+ }
+
+ __update_nat_bits(nm_i, nat_ofs, valid);
}
- __clear_bit_le(nat_index, nm_i->empty_nat_bits);
- if (valid == NAT_ENTRY_PER_BLOCK)
- __set_bit_le(nat_index, nm_i->full_nat_bits);
- else
- __clear_bit_le(nat_index, nm_i->full_nat_bits);
+ up_read(&nm_i->nat_tree_lock);
}
static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
@@ -2901,7 +2986,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
* #1, flush nat entries to journal in current hot data summary block.
* #2, flush nat entries to nat page.
*/
- if (enabled_nat_bits(sbi, cpc) ||
+ if ((cpc->reason & CP_UMOUNT) ||
!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
to_journal = false;
@@ -2948,7 +3033,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
if (to_journal) {
up_write(&curseg->journal_rwsem);
} else {
- __update_nat_bits(sbi, start_nid, page);
+ update_nat_bits(sbi, start_nid, page);
f2fs_put_page(page, 1);
}
@@ -2979,7 +3064,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* during unmount, let's flush nat_bits before checking
* nat_cnt[DIRTY_NAT].
*/
- if (enabled_nat_bits(sbi, cpc)) {
+ if (cpc->reason & CP_UMOUNT) {
down_write(&nm_i->nat_tree_lock);
remove_nats_in_journal(sbi);
up_write(&nm_i->nat_tree_lock);
@@ -2995,7 +3080,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* entries, remove all entries from journal and merge them
* into nat entry set.
*/
- if (enabled_nat_bits(sbi, cpc) ||
+ if (cpc->reason & CP_UMOUNT ||
!__has_cursum_space(journal,
nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL))
remove_nats_in_journal(sbi);
@@ -3032,15 +3117,18 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
__u64 cp_ver = cur_cp_version(ckpt);
block_t nat_bits_addr;
- if (!enabled_nat_bits(sbi, NULL))
- return 0;
-
nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
nm_i->nat_bits = f2fs_kvzalloc(sbi,
nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL);
if (!nm_i->nat_bits)
return -ENOMEM;
+ nm_i->full_nat_bits = nm_i->nat_bits + 8;
+ nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
+
+ if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
+ return 0;
+
nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++) {
@@ -3057,13 +3145,12 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
cp_ver |= (cur_cp_crc(ckpt) << 32);
if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
- disable_nat_bits(sbi, true);
+ clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
+ f2fs_notice(sbi, "Disable nat_bits due to incorrect cp_ver (%llu, %llu)",
+ cp_ver, le64_to_cpu(*(__le64 *)nm_i->nat_bits));
return 0;
}
- nm_i->full_nat_bits = nm_i->nat_bits + 8;
- nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
-
f2fs_notice(sbi, "Found nat_bits in checkpoint");
return 0;
}
@@ -3074,7 +3161,7 @@ static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
unsigned int i = 0;
nid_t nid, last_nid;
- if (!enabled_nat_bits(sbi, NULL))
+ if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
return;
for (i = 0; i < nm_i->nat_blocks; i++) {