diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-07-28 08:58:58 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-07-28 08:58:58 -0700 |
| commit | a90f1b6ad6649d553c9d76f50a42e4ba5783164b (patch) | |
| tree | c09b83b82586a8ff680db5a86081704c74bf1eb4 | |
| parent | Merge tag 'xfs-merge-6.17' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux (diff) | |
| parent | gfs2: No more self recovery (diff) | |
| download | linux-a90f1b6ad6649d553c9d76f50a42e4ba5783164b.tar.gz linux-a90f1b6ad6649d553c9d76f50a42e4ba5783164b.zip | |
Merge tag 'gfs2-for-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2
Pull gfs2 updates from Andreas Gruenbacher:
- Prevent cluster nodes from trying to recover their own filesystems
during a withdraw
- Add two missing migrate_folio aops and an additional exhash directory
consistency check (both triggered by syzbot bug reports)
- Sanitize how dlm results are processed and clean up a few quirks in
the glock code
- Minor stuff: Get rid of the GIF_ALLOC_FAILED flag; use SECTOR_SIZE
and SECTOR_SHIFT
* tag 'gfs2-for-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2:
gfs2: No more self recovery
gfs2: Validate i_depth for exhash directories
gfs2: Set .migrate_folio in gfs2_{rgrp,meta}_aops
gfs2: a minor finish_xmote cleanup
gfs2: simplify finish_xmote
gfs2: sanitize the gdlm_ast -> finish_xmote interface
gfs2: Minor do_xmote cancelation fix
gfs2: Remove GIF_ALLOC_FAILED flag
gfs2: Use SECTOR_SIZE and SECTOR_SHIFT
| -rw-r--r-- | fs/gfs2/dir.c | 6 | ||||
| -rw-r--r-- | fs/gfs2/glock.c | 43 | ||||
| -rw-r--r-- | fs/gfs2/glock.h | 10 | ||||
| -rw-r--r-- | fs/gfs2/glops.c | 6 | ||||
| -rw-r--r-- | fs/gfs2/incore.h | 1 | ||||
| -rw-r--r-- | fs/gfs2/inode.c | 7 | ||||
| -rw-r--r-- | fs/gfs2/inode.h | 6 | ||||
| -rw-r--r-- | fs/gfs2/lock_dlm.c | 9 | ||||
| -rw-r--r-- | fs/gfs2/meta_io.c | 10 | ||||
| -rw-r--r-- | fs/gfs2/ops_fstype.c | 12 | ||||
| -rw-r--r-- | fs/gfs2/super.c | 6 | ||||
| -rw-r--r-- | fs/gfs2/util.c | 31 |
12 files changed, 74 insertions, 73 deletions
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index dbf1aede744c..509e2f0d97e7 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -60,6 +60,7 @@ #include <linux/crc32.h> #include <linux/vmalloc.h> #include <linux/bio.h> +#include <linux/log2.h> #include "gfs2.h" #include "incore.h" @@ -912,7 +913,6 @@ static int dir_make_exhash(struct inode *inode) struct qstr args; struct buffer_head *bh, *dibh; struct gfs2_leaf *leaf; - int y; u32 x; __be64 *lp; u64 bn; @@ -979,9 +979,7 @@ static int dir_make_exhash(struct inode *inode) i_size_write(inode, sdp->sd_sb.sb_bsize / 2); gfs2_add_inode_blocks(&dip->i_inode, 1); dip->i_diskflags |= GFS2_DIF_EXHASH; - - for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; - dip->i_depth = y; + dip->i_depth = ilog2(sdp->sd_hash_ptrs); gfs2_dinode_out(dip, dibh->b_data); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ba25b884169e..b6fd1cb17de7 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -590,35 +590,31 @@ static void gfs2_demote_wake(struct gfs2_glock *gl) static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) { const struct gfs2_glock_operations *glops = gl->gl_ops; - struct gfs2_holder *gh; - unsigned state = ret & LM_OUT_ST_MASK; - trace_gfs2_glock_state_change(gl, state); - state_change(gl, state); - gh = find_first_waiter(gl); + if (!(ret & ~LM_OUT_ST_MASK)) { + unsigned state = ret & LM_OUT_ST_MASK; + + trace_gfs2_glock_state_change(gl, state); + state_change(gl, state); + } + /* Demote to UN request arrived during demote to SH or DF */ if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && - state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED) + gl->gl_state != LM_ST_UNLOCKED && + gl->gl_demote_state == LM_ST_UNLOCKED) gl->gl_target = LM_ST_UNLOCKED; /* Check for state != intended state */ - if (unlikely(state != gl->gl_target)) { - if (gh && (ret & LM_OUT_CANCELED)) - gfs2_holder_wake(gh); + if (unlikely(gl->gl_state != gl->gl_target)) { + struct gfs2_holder *gh = find_first_waiter(gl); + if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { if (ret & LM_OUT_CANCELED) { list_del_init(&gh->gh_list); trace_gfs2_glock_queue(gh, 0); + gfs2_holder_wake(gh); gl->gl_target = gl->gl_state; - gh = find_first_waiter(gl); - if (gh) { - gl->gl_target = gh->gh_state; - if (do_promote(gl)) - goto out; - do_xmote(gl, gh, gl->gl_target); - return; - } goto out; } /* Some error or failed "try lock" - report it */ @@ -629,7 +625,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) goto out; } } - switch(state) { + switch(gl->gl_state) { /* Unlocked due to conversion deadlock, try again */ case LM_ST_UNLOCKED: do_xmote(gl, gh, gl->gl_target); @@ -640,8 +636,10 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) do_xmote(gl, gh, LM_ST_UNLOCKED); break; default: /* Everything else */ - fs_err(gl->gl_name.ln_sbd, "wanted %u got %u\n", - gl->gl_target, state); + fs_err(gl->gl_name.ln_sbd, + "glock %u:%llu requested=%u ret=%u\n", + gl->gl_name.ln_type, gl->gl_name.ln_number, + gl->gl_req, ret); GLOCK_BUG_ON(gl, 1); } return; @@ -650,7 +648,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) /* Fast path - we got what we asked for */ if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) gfs2_demote_wake(gl); - if (state != LM_ST_UNLOCKED) { + if (gl->gl_state != LM_ST_UNLOCKED) { if (glops->go_xmote_bh) { int rv; @@ -802,7 +800,8 @@ skip_inval: * We skip telling dlm to do the locking, so we won't get a * reply that would otherwise clear GLF_LOCK. So we clear it here. */ - clear_bit(GLF_LOCK, &gl->gl_flags); + if (!test_bit(GLF_CANCELING, &gl->gl_flags)) + clear_bit(GLF_LOCK, &gl->gl_flags); clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); return; diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index c171f745650f..9339a3bff6ee 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -92,12 +92,22 @@ enum { * LM_OUT_ST_MASK * Masks the lower two bits of lock state in the returned value. * + * LM_OUT_TRY_AGAIN + * The trylock request failed. + * + * LM_OUT_DEADLOCK + * The lock request failed because it would deadlock. + * * LM_OUT_CANCELED * The lock request was canceled. * + * LM_OUT_ERROR + * The lock request timed out or failed. */ #define LM_OUT_ST_MASK 0x00000003 +#define LM_OUT_TRY_AGAIN 0x00000020 +#define LM_OUT_DEADLOCK 0x00000010 #define LM_OUT_CANCELED 0x00000008 #define LM_OUT_ERROR 0x00000004 diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index cebd66b22694..fe0faad4892f 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -11,6 +11,7 @@ #include <linux/bio.h> #include <linux/posix_acl.h> #include <linux/security.h> +#include <linux/log2.h> #include "gfs2.h" #include "incore.h" @@ -450,6 +451,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) gfs2_consist_inode(ip); return -EIO; } + if ((ip->i_diskflags & GFS2_DIF_EXHASH) && + depth < ilog2(sdp->sd_hash_ptrs)) { + gfs2_consist_inode(ip); + return -EIO; + } ip->i_depth = (u8)depth; ip->i_entries = be32_to_cpu(str->di_entries); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 0a41c4e76b32..d4ad82f47eee 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -375,7 +375,6 @@ struct gfs2_glock { enum { GIF_QD_LOCKED = 1, - GIF_ALLOC_FAILED = 2, GIF_SW_PAGED = 3, GIF_FREE_VFS_INODE = 5, GIF_GLOP_PENDING = 6, diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 187d789a8f1e..8760e7e20c9d 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -444,11 +444,9 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip) struct inode *inode = &ip->i_inode; struct gfs2_glock *gl = ip->i_gl; - if (unlikely(!gl)) { - /* This can only happen during incomplete inode creation. */ - BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + /* This can only happen during incomplete inode creation. */ + if (unlikely(!gl)) return; - } truncate_inode_pages(gfs2_glock2aspace(gl), 0); truncate_inode_pages(&inode->i_data, 0); @@ -902,7 +900,6 @@ fail_gunlock3: fail_gunlock2: gfs2_glock_put(io_gl); fail_dealloc_inode: - set_bit(GIF_ALLOC_FAILED, &ip->i_flags); dealloc_error = 0; if (ip->i_eattr) dealloc_error = gfs2_ea_dealloc(ip, xattr_initialized); diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index eafe123617e6..811a0bd3792c 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -44,17 +44,17 @@ static inline int gfs2_is_dir(const struct gfs2_inode *ip) static inline void gfs2_set_inode_blocks(struct inode *inode, u64 blocks) { - inode->i_blocks = blocks << (inode->i_blkbits - 9); + inode->i_blocks = blocks << (inode->i_blkbits - SECTOR_SHIFT); } static inline u64 gfs2_get_inode_blocks(const struct inode *inode) { - return inode->i_blocks >> (inode->i_blkbits - 9); + return inode->i_blocks >> (inode->i_blkbits - SECTOR_SHIFT); } static inline void gfs2_add_inode_blocks(struct inode *inode, s64 change) { - change <<= inode->i_blkbits - 9; + change <<= inode->i_blkbits - SECTOR_SHIFT; gfs2_assert(GFS2_SB(inode), (change >= 0 || inode->i_blocks >= -change)); inode->i_blocks += change; } diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 7cb9d216d8bb..cee5d199d2d8 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -119,7 +119,7 @@ static inline void gfs2_update_request_times(struct gfs2_glock *gl) static void gdlm_ast(void *arg) { struct gfs2_glock *gl = arg; - unsigned ret = gl->gl_state; + unsigned ret; /* If the glock is dead, we only react to a dlm_unlock() reply. */ if (__lockref_is_dead(&gl->gl_lockref) && @@ -139,13 +139,16 @@ static void gdlm_ast(void *arg) gfs2_glock_free(gl); return; case -DLM_ECANCEL: /* Cancel while getting lock */ - ret |= LM_OUT_CANCELED; + ret = LM_OUT_CANCELED; goto out; case -EAGAIN: /* Try lock fails */ + ret = LM_OUT_TRY_AGAIN; + goto out; case -EDEADLK: /* Deadlock detected */ + ret = LM_OUT_DEADLOCK; goto out; case -ETIMEDOUT: /* Canceled due to timeout */ - ret |= LM_OUT_ERROR; + ret = LM_OUT_ERROR; goto out; case 0: /* Success */ break; diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 9dc8885c95d0..7fb11ff71b5a 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -103,6 +103,7 @@ const struct address_space_operations gfs2_meta_aops = { .invalidate_folio = block_invalidate_folio, .writepages = gfs2_aspace_writepages, .release_folio = gfs2_release_folio, + .migrate_folio = buffer_migrate_folio_norefs, }; const struct address_space_operations gfs2_rgrp_aops = { @@ -110,6 +111,7 @@ const struct address_space_operations gfs2_rgrp_aops = { .invalidate_folio = block_invalidate_folio, .writepages = gfs2_aspace_writepages, .release_folio = gfs2_release_folio, + .migrate_folio = buffer_migrate_folio_norefs, }; /** @@ -228,7 +230,7 @@ static void gfs2_submit_bhs(blk_opf_t opf, struct buffer_head *bhs[], int num) struct bio *bio; bio = bio_alloc(bh->b_bdev, num, opf, GFP_NOIO); - bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); + bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> SECTOR_SHIFT); while (num > 0) { bh = *bhs; if (!bio_add_folio(bio, bh->b_folio, bh->b_size, bh_offset(bh))) { @@ -443,11 +445,9 @@ void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) struct buffer_head *bh; int ty; - if (!ip->i_gl) { - /* This can only happen during incomplete inode creation. */ - BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + /* This can only happen during incomplete inode creation. */ + if (!ip->i_gl) return; - } gfs2_ail1_wipe(sdp, bstart, blen); while (blen) { diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 85c491fcf1a3..a19d7e431c8e 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -163,7 +163,7 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) return -EINVAL; } - if (sb->sb_bsize < 512 || sb->sb_bsize > PAGE_SIZE || + if (sb->sb_bsize < SECTOR_SIZE || sb->sb_bsize > PAGE_SIZE || (sb->sb_bsize & (sb->sb_bsize - 1))) { pr_warn("Invalid block size\n"); return -EINVAL; @@ -224,8 +224,8 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) if (unlikely(!sb)) return -ENOMEM; err = bdev_rw_virt(sdp->sd_vfs->s_bdev, - sector * (sdp->sd_vfs->s_blocksize >> 9), sb, PAGE_SIZE, - REQ_OP_READ | REQ_META); + sector << (sdp->sd_vfs->s_blocksize_bits - SECTOR_SHIFT), + sb, PAGE_SIZE, REQ_OP_READ | REQ_META); if (err) { pr_warn("error %d reading superblock\n", err); kfree(sb); @@ -257,7 +257,7 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent) return error; } - sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - 9; + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - SECTOR_SHIFT; sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift); sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) / sizeof(u64); @@ -1155,12 +1155,12 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) /* Set up the buffer cache and fill in some fake block size values to allow us to read-in the on-disk superblock. */ - sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, 512); + sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, SECTOR_SIZE); error = -EINVAL; if (!sdp->sd_sb.sb_bsize) goto fail_free; sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits; - sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - 9; + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - SECTOR_SHIFT; sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift); sdp->sd_tune.gt_logd_secs = sdp->sd_args.ar_commit; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 7c518c4ff638..b42e2110084b 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -487,11 +487,9 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) int need_endtrans = 0; int ret; - if (unlikely(!ip->i_gl)) { - /* This can only happen during incomplete inode creation. */ - BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + /* This can only happen during incomplete inode creation. */ + if (unlikely(!ip->i_gl)) return; - } if (gfs2_withdrawing_or_withdrawn(sdp)) return; diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index d5a1e63fa257..24864a66074b 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -232,32 +232,23 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) */ ret = gfs2_glock_nq(&sdp->sd_live_gh); + gfs2_glock_put(live_gl); /* drop extra reference we acquired */ + clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); + /* * If we actually got the "live" lock in EX mode, there are no other - * nodes available to replay our journal. So we try to replay it - * ourselves. We hold the "live" glock to prevent other mounters - * during recovery, then just dequeue it and reacquire it in our - * normal SH mode. Just in case the problem that caused us to - * withdraw prevents us from recovering our journal (e.g. io errors - * and such) we still check if the journal is clean before proceeding - * but we may wait forever until another mounter does the recovery. + * nodes available to replay our journal. */ if (ret == 0) { - fs_warn(sdp, "No other mounters found. Trying to recover our " - "own journal jid %d.\n", sdp->sd_lockstruct.ls_jid); - if (gfs2_recover_journal(sdp->sd_jdesc, 1)) - fs_warn(sdp, "Unable to recover our journal jid %d.\n", - sdp->sd_lockstruct.ls_jid); - gfs2_glock_dq_wait(&sdp->sd_live_gh); - gfs2_holder_reinit(LM_ST_SHARED, - LM_FLAG_NOEXP | GL_EXACT | GL_NOPID, - &sdp->sd_live_gh); - gfs2_glock_nq(&sdp->sd_live_gh); + fs_warn(sdp, "No other mounters found.\n"); + /* + * We are about to release the lockspace. By keeping live_gl + * locked here, we ensure that the next mounter coming along + * will be a "first" mounter which will perform recovery. + */ + goto skip_recovery; } - gfs2_glock_put(live_gl); /* drop extra reference we acquired */ - clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); - /* * At this point our journal is evicted, so we need to get a new inode * for it. Once done, we need to call gfs2_find_jhead which |
