diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-09-29 15:24:58 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-09-29 15:24:58 -0700 |
| commit | a769648f464c9f453b3dc5c2bb8559b28c5d78a1 (patch) | |
| tree | 9b50cb3da23488c6b298278551a662b7f4ba2f06 | |
| parent | Merge tag 'erofs-for-6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/g... (diff) | |
| parent | dlm: check for undefined release_option values (diff) | |
| download | linux-a769648f464c9f453b3dc5c2bb8559b28c5d78a1.tar.gz linux-a769648f464c9f453b3dc5c2bb8559b28c5d78a1.zip | |
Merge tag 'dlm-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm
Pull dlm updates from David Teigland:
"This adds a dlm_release_lockspace() flag to request that node-failure
recovery be performed for the node leaving the lockspace.
The implementation of this flag requires coordination with userland
clustering components. It's been requested for use by GFS2"
* tag 'dlm-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm:
dlm: check for undefined release_option values
dlm: handle release_option as unsigned
dlm: move to rinfo for all middle conversion cases
dlm: handle invalid lockspace member remove
dlm: add new flag DLM_RELEASE_RECOVER for dlm_lockspace_release
dlm: add new configfs entry release_recover for lockspace members
dlm: add new RELEASE_RECOVER uevent attribute for release_lockspace
dlm: use defines for force values in dlm_release_lockspace
dlm: check for defined force value in dlm_lockspace_release
| -rw-r--r-- | drivers/md/md-cluster.c | 4 | ||||
| -rw-r--r-- | fs/dlm/config.c | 64 | ||||
| -rw-r--r-- | fs/dlm/config.h | 2 | ||||
| -rw-r--r-- | fs/dlm/lock.c | 2 | ||||
| -rw-r--r-- | fs/dlm/lockspace.c | 46 | ||||
| -rw-r--r-- | fs/dlm/member.c | 27 | ||||
| -rw-r--r-- | fs/dlm/recover.c | 2 | ||||
| -rw-r--r-- | fs/dlm/user.c | 6 | ||||
| -rw-r--r-- | fs/gfs2/lock_dlm.c | 4 | ||||
| -rw-r--r-- | fs/ocfs2/stack_user.c | 2 | ||||
| -rw-r--r-- | include/linux/dlm.h | 33 |
11 files changed, 153 insertions, 39 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 5497eaee96e7..6e9a0045f0ff 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -979,7 +979,7 @@ err: lockres_free(cinfo->resync_lockres); lockres_free(cinfo->bitmap_lockres); if (cinfo->lockspace) - dlm_release_lockspace(cinfo->lockspace, 2); + dlm_release_lockspace(cinfo->lockspace, DLM_RELEASE_NORMAL); mddev->cluster_info = NULL; kfree(cinfo); return ret; @@ -1042,7 +1042,7 @@ static int leave(struct mddev *mddev) lockres_free(cinfo->resync_lockres); lockres_free(cinfo->bitmap_lockres); unlock_all_bitmaps(mddev); - dlm_release_lockspace(cinfo->lockspace, 2); + dlm_release_lockspace(cinfo->lockspace, DLM_RELEASE_NORMAL); kfree(cinfo); return 0; } diff --git a/fs/dlm/config.c b/fs/dlm/config.c index a23fd524a6ee..a0d75b5c83c6 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -26,6 +26,7 @@ /* * /config/dlm/<cluster>/spaces/<space>/nodes/<node>/nodeid (refers to <node>) * /config/dlm/<cluster>/spaces/<space>/nodes/<node>/weight + * /config/dlm/<cluster>/spaces/<space>/nodes/<node>/release_recover * /config/dlm/<cluster>/comms/<comm>/nodeid (refers to <comm>) * /config/dlm/<cluster>/comms/<comm>/local * /config/dlm/<cluster>/comms/<comm>/addr (write only) @@ -267,6 +268,7 @@ enum { enum { NODE_ATTR_NODEID = 0, NODE_ATTR_WEIGHT, + NODE_ATTR_RELEASE_RECOVER, }; struct dlm_clusters { @@ -280,6 +282,8 @@ struct dlm_spaces { struct dlm_space { struct config_group group; struct list_head members; + struct list_head members_gone; + int members_gone_count; struct mutex members_lock; int members_count; struct dlm_nodes *nds; @@ -310,6 +314,14 @@ struct dlm_node { int weight; int new; int comm_seq; /* copy of cm->seq when nd->nodeid is set */ + unsigned int release_recover; +}; + +struct dlm_member_gone { + int nodeid; + unsigned int release_recover; + + struct list_head list; /* space->members_gone */ }; static struct configfs_group_operations clusters_ops = { @@ -480,6 +492,7 @@ static struct config_group *make_space(struct config_group *g, const char *name) configfs_add_default_group(&nds->ns_group, &sp->group); INIT_LIST_HEAD(&sp->members); + INIT_LIST_HEAD(&sp->members_gone); mutex_init(&sp->members_lock); sp->members_count = 0; sp->nds = nds; @@ -587,10 +600,20 @@ static void drop_node(struct config_group *g, struct config_item *i) { struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent); struct dlm_node *nd = config_item_to_node(i); + struct dlm_member_gone *mb_gone; + + mb_gone = kzalloc(sizeof(*mb_gone), GFP_KERNEL); + if (!mb_gone) + return; mutex_lock(&sp->members_lock); list_del(&nd->list); sp->members_count--; + + mb_gone->nodeid = nd->nodeid; + mb_gone->release_recover = nd->release_recover; + list_add(&mb_gone->list, &sp->members_gone); + sp->members_gone_count++; mutex_unlock(&sp->members_lock); config_item_put(i); @@ -815,12 +838,34 @@ static ssize_t node_weight_store(struct config_item *item, const char *buf, return len; } +static ssize_t node_release_recover_show(struct config_item *item, char *buf) +{ + struct dlm_node *n = config_item_to_node(item); + + return sprintf(buf, "%u\n", n->release_recover); +} + +static ssize_t node_release_recover_store(struct config_item *item, + const char *buf, size_t len) +{ + struct dlm_node *n = config_item_to_node(item); + int rc; + + rc = kstrtouint(buf, 0, &n->release_recover); + if (rc) + return rc; + + return len; +} + CONFIGFS_ATTR(node_, nodeid); CONFIGFS_ATTR(node_, weight); +CONFIGFS_ATTR(node_, release_recover); static struct configfs_attribute *node_attrs[] = { [NODE_ATTR_NODEID] = &node_attr_nodeid, [NODE_ATTR_WEIGHT] = &node_attr_weight, + [NODE_ATTR_RELEASE_RECOVER] = &node_attr_release_recover, NULL, }; @@ -882,9 +927,10 @@ static void put_comm(struct dlm_comm *cm) int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, int *count_out) { + struct dlm_member_gone *mb_gone, *mb_safe; + struct dlm_config_node *nodes, *node; struct dlm_space *sp; struct dlm_node *nd; - struct dlm_config_node *nodes, *node; int rv, count; sp = get_space(lsname); @@ -898,7 +944,7 @@ int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, goto out; } - count = sp->members_count; + count = sp->members_count + sp->members_gone_count; nodes = kcalloc(count, sizeof(struct dlm_config_node), GFP_NOFS); if (!nodes) { @@ -917,6 +963,20 @@ int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, nd->new = 0; } + /* we delay the remove on nodes until here as configfs does + * not support addtional attributes for rmdir(). + */ + list_for_each_entry_safe(mb_gone, mb_safe, &sp->members_gone, list) { + node->nodeid = mb_gone->nodeid; + node->release_recover = mb_gone->release_recover; + node->gone = true; + node++; + + list_del(&mb_gone->list); + sp->members_gone_count--; + kfree(mb_gone); + } + *count_out = count; *nodes_out = nodes; rv = 0; diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 13a3d0b26194..4ebd45f75276 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -17,8 +17,10 @@ struct dlm_config_node { int nodeid; int weight; + bool gone; int new; uint32_t comm_seq; + unsigned int release_recover; }; extern const struct rhashtable_params dlm_rhash_rsb_params; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 6dd3a524cd35..be938fdf17d9 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -5576,7 +5576,7 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, if (rl->rl_status == DLM_LKSTS_CONVERT && middle_conversion(lkb)) { /* We may need to adjust grmode depending on other granted locks. */ - log_limit(ls, "%s %x middle convert gr %d rq %d remote %d %x", + log_rinfo(ls, "%s %x middle convert gr %d rq %d remote %d %x", __func__, lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode, lkb->lkb_nodeid, lkb->lkb_remid); rsb_set_flag(r, RSB_RECOVER_CONVERT); diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 1929327ffbe1..ddaa76558706 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -186,12 +186,17 @@ static struct kobj_type dlm_ktype = { static struct kset *dlm_kset; -static int do_uevent(struct dlm_ls *ls, int in) +static int do_uevent(struct dlm_ls *ls, int in, unsigned int release_recover) { - if (in) + char message[512] = {}; + char *envp[] = { message, NULL }; + + if (in) { kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE); - else - kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE); + } else { + snprintf(message, 511, "RELEASE_RECOVER=%u", release_recover); + kobject_uevent_env(&ls->ls_kobj, KOBJ_OFFLINE, envp); + } log_rinfo(ls, "%s the lockspace group...", in ? "joining" : "leaving"); @@ -575,7 +580,7 @@ static int new_lockspace(const char *name, const char *cluster, current lockspace members are (via configfs) and then tells the lockspace to start running (via sysfs) in dlm_ls_start(). */ - error = do_uevent(ls, 1); + error = do_uevent(ls, 1, 0); if (error < 0) goto out_recoverd; @@ -592,7 +597,7 @@ static int new_lockspace(const char *name, const char *cluster, return 0; out_members: - do_uevent(ls, 0); + do_uevent(ls, 0, 0); dlm_clear_members(ls); kfree(ls->ls_node_array); out_recoverd: @@ -671,19 +676,20 @@ int dlm_new_user_lockspace(const char *name, const char *cluster, This is because there may be LKBs queued as ASTs that have been unlinked from their RSBs and are pending deletion once the AST has been delivered */ -static int lockspace_busy(struct dlm_ls *ls, int force) +static int lockspace_busy(struct dlm_ls *ls, unsigned int release_option) { struct dlm_lkb *lkb; unsigned long id; int rv = 0; read_lock_bh(&ls->ls_lkbxa_lock); - if (force == 0) { + if (release_option == DLM_RELEASE_NO_LOCKS) { xa_for_each(&ls->ls_lkbxa, id, lkb) { rv = 1; break; } - } else if (force == 1) { + } else if (release_option == DLM_RELEASE_UNUSED) { + /* TODO: handle this UNUSED option as NO_LOCKS in later patch */ xa_for_each(&ls->ls_lkbxa, id, lkb) { if (lkb->lkb_nodeid == 0 && lkb->lkb_grmode != DLM_LOCK_IV) { @@ -698,11 +704,11 @@ static int lockspace_busy(struct dlm_ls *ls, int force) return rv; } -static int release_lockspace(struct dlm_ls *ls, int force) +static int release_lockspace(struct dlm_ls *ls, unsigned int release_option) { int busy, rv; - busy = lockspace_busy(ls, force); + busy = lockspace_busy(ls, release_option); spin_lock_bh(&lslist_lock); if (ls->ls_create_count == 1) { @@ -730,8 +736,9 @@ static int release_lockspace(struct dlm_ls *ls, int force) dlm_device_deregister(ls); - if (force < 3 && dlm_user_daemon_available()) - do_uevent(ls, 0); + if (release_option != DLM_RELEASE_NO_EVENT && + dlm_user_daemon_available()) + do_uevent(ls, 0, (release_option == DLM_RELEASE_RECOVER)); dlm_recoverd_stop(ls); @@ -782,25 +789,24 @@ static int release_lockspace(struct dlm_ls *ls, int force) * lockspace must continue to function as usual, participating in recoveries, * until this returns. * - * Force has 4 possible values: - * 0 - don't destroy lockspace if it has any LKBs - * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs - * 2 - destroy lockspace regardless of LKBs - * 3 - destroy lockspace as part of a forced shutdown + * See DLM_RELEASE defines for release_option values and their meaning. */ -int dlm_release_lockspace(void *lockspace, int force) +int dlm_release_lockspace(void *lockspace, unsigned int release_option) { struct dlm_ls *ls; int error; + if (release_option > __DLM_RELEASE_MAX) + return -EINVAL; + ls = dlm_find_lockspace_local(lockspace); if (!ls) return -EINVAL; dlm_put_lockspace(ls); mutex_lock(&ls_lock); - error = release_lockspace(ls, force); + error = release_lockspace(ls, release_option); if (!error) ls_count--; if (!ls_count) diff --git a/fs/dlm/member.c b/fs/dlm/member.c index b0864c93230f..c0f557a80a75 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -478,7 +478,8 @@ static void dlm_lsop_recover_prep(struct dlm_ls *ls) ls->ls_ops->recover_prep(ls->ls_ops_arg); } -static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb) +static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb, + unsigned int release_recover) { struct dlm_slot slot; uint32_t seq; @@ -495,7 +496,7 @@ static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb) error = dlm_comm_seq(memb->nodeid, &seq, false); - if (!error && seq == memb->comm_seq) + if (!release_recover && !error && seq == memb->comm_seq) return; slot.nodeid = memb->nodeid; @@ -552,6 +553,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) struct dlm_member *memb, *safe; struct dlm_config_node *node; int i, error, neg = 0, low = -1; + unsigned int release_recover; /* previously removed members that we've not finished removing need to * count as a negative change so the "neg" recovery steps will happen @@ -569,11 +571,21 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) { node = find_config_node(rv, memb->nodeid); - if (node && !node->new) + if (!node) { + log_error(ls, "remove member %d invalid", + memb->nodeid); + return -EFAULT; + } + + if (!node->new && !node->gone) continue; - if (!node) { - log_rinfo(ls, "remove member %d", memb->nodeid); + release_recover = 0; + + if (node->gone) { + release_recover = node->release_recover; + log_rinfo(ls, "remove member %d%s", memb->nodeid, + release_recover ? " (release_recover)" : ""); } else { /* removed and re-added */ log_rinfo(ls, "remove member %d comm_seq %u %u", @@ -584,13 +596,16 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) list_move(&memb->list, &ls->ls_nodes_gone); remove_remote_member(memb->nodeid); ls->ls_num_nodes--; - dlm_lsop_recover_slot(ls, memb); + dlm_lsop_recover_slot(ls, memb, release_recover); } /* add new members to ls_nodes */ for (i = 0; i < rv->nodes_count; i++) { node = &rv->nodes[i]; + if (node->gone) + continue; + if (dlm_is_member(ls, node->nodeid)) continue; error = dlm_add_member(ls, node); diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index be4240f09abd..3ac020fb8139 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -842,7 +842,7 @@ static void recover_conversion(struct dlm_rsb *r) */ if (((lkb->lkb_grmode == DLM_LOCK_PR) && (other_grmode == DLM_LOCK_CW)) || ((lkb->lkb_grmode == DLM_LOCK_CW) && (other_grmode == DLM_LOCK_PR))) { - log_limit(ls, "%s %x gr %d rq %d, remote %d %x, other_lkid %u, other gr %d, set gr=NL", + log_rinfo(ls, "%s %x gr %d rq %d, remote %d %x, other_lkid %u, other gr %d, set gr=NL", __func__, lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode, lkb->lkb_nodeid, lkb->lkb_remid, other_lkid, other_grmode); diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 5cb3896be826..51daf4acbe31 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -425,7 +425,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params) dlm_put_lockspace(ls); if (error) - dlm_release_lockspace(lockspace, 0); + dlm_release_lockspace(lockspace, DLM_RELEASE_NO_LOCKS); else error = ls->ls_device.minor; @@ -436,7 +436,7 @@ static int device_remove_lockspace(struct dlm_lspace_params *params) { dlm_lockspace_t *lockspace; struct dlm_ls *ls; - int error, force = 0; + int error, force = DLM_RELEASE_NO_LOCKS; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -446,7 +446,7 @@ static int device_remove_lockspace(struct dlm_lspace_params *params) return -ENOENT; if (params->flags & DLM_USER_LSFLG_FORCEFREE) - force = 2; + force = DLM_RELEASE_NORMAL; lockspace = ls; dlm_put_lockspace(ls); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index ae058b1f9435..4f00af7dd256 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -1418,7 +1418,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) return 0; fail_release: - dlm_release_lockspace(ls->ls_dlm, 2); + dlm_release_lockspace(ls->ls_dlm, DLM_RELEASE_NORMAL); fail_free: free_recover_size(ls); fail: @@ -1456,7 +1456,7 @@ static void gdlm_unmount(struct gfs2_sbd *sdp) release: down_write(&ls->ls_sem); if (ls->ls_dlm) { - dlm_release_lockspace(ls->ls_dlm, 2); + dlm_release_lockspace(ls->ls_dlm, DLM_RELEASE_NORMAL); ls->ls_dlm = NULL; } up_write(&ls->ls_sem); diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 0f045e45fa0c..765105f1ff8a 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -952,7 +952,7 @@ static const struct dlm_lockspace_ops ocfs2_ls_ops = { static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn) { version_unlock(conn); - dlm_release_lockspace(conn->cc_lockspace, 2); + dlm_release_lockspace(conn->cc_lockspace, DLM_RELEASE_NORMAL); conn->cc_lockspace = NULL; ocfs2_live_connection_drop(conn->cc_private); conn->cc_private = NULL; diff --git a/include/linux/dlm.h b/include/linux/dlm.h index bacda9898f2b..7e7b45b0d097 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -88,12 +88,43 @@ int dlm_new_lockspace(const char *name, const char *cluster, int *ops_result, dlm_lockspace_t **lockspace); /* + * dlm_release_lockspace() release_option values: + * + * DLM_RELEASE_NO_LOCKS returns -EBUSY if any locks (lkb's) + * exist in the local lockspace. + * + * DLM_RELEASE_UNUSED previous value that is no longer used. + * + * DLM_RELEASE_NORMAL releases the lockspace regardless of any + * locks managed in the local lockspace. + * + * DLM_RELEASE_NO_EVENT release the lockspace regardless of any + * locks managed in the local lockspace, and does not submit + * a leave event to the cluster manager, so other nodes will + * not be notified that the node should be removed from the + * list of lockspace members. + * + * DLM_RELEASE_RECOVER like DLM_RELEASE_NORMAL, but the remaining + * nodes will handle the removal of the node as if the node + * had failed, e.g. the recover_slot() callback would be used. + */ +#define DLM_RELEASE_NO_LOCKS 0 +#define DLM_RELEASE_UNUSED 1 +#define DLM_RELEASE_NORMAL 2 +#define DLM_RELEASE_NO_EVENT 3 +#define DLM_RELEASE_RECOVER 4 +#define __DLM_RELEASE_MAX DLM_RELEASE_RECOVER + +/* * dlm_release_lockspace * * Stop a lockspace. + * + * release_option: see DLM_RELEASE values above. */ -int dlm_release_lockspace(dlm_lockspace_t *lockspace, int force); +int dlm_release_lockspace(dlm_lockspace_t *lockspace, + unsigned int release_option); /* * dlm_lock |
