aboutsummaryrefslogtreecommitdiffstats
path: root/net/devlink
diff options
context:
space:
mode:
authorCarolina Jubran <cjubran@nvidia.com>2025-06-29 17:21:32 +0300
committerJakub Kicinski <kuba@kernel.org>2025-07-02 15:39:05 -0700
commit566e8f108fc7847f2a8676ec6a101d37b7dd0fb4 (patch)
tree54459760ece46729169c20a5c2848d8c9dc69589 /net/devlink
parentnetlink: introduce type-checking attribute iteration for nlmsg (diff)
downloadlinux-566e8f108fc7847f2a8676ec6a101d37b7dd0fb4.tar.gz
linux-566e8f108fc7847f2a8676ec6a101d37b7dd0fb4.zip
devlink: Extend devlink rate API with traffic classes bandwidth management
Introduce support for specifying relative bandwidth shares between traffic classes (TC) in the devlink-rate API. This new option allows users to allocate bandwidth across multiple traffic classes in a single command. This feature provides a more granular control over traffic management, especially for scenarios requiring Enhanced Transmission Selection. Users can now define a relative bandwidth share for each traffic class. For example, assigning share values of 20 to TC0 (TCP/UDP) and 80 to TC5 (RoCE) will result in TC0 receiving 20% and TC5 receiving 80% of the total bandwidth. The actual percentage each class receives depends on the ratio of its share value to the sum of all shares. Example: DEV=pci/0000:08:00.0 $ devlink port function rate add $DEV/vfs_group tx_share 10Gbit \ tx_max 50Gbit tc-bw 0:20 1:0 2:0 3:0 4:0 5:80 6:0 7:0 $ devlink port function rate set $DEV/vfs_group \ tc-bw 0:20 1:0 2:0 3:0 4:0 5:20 6:60 7:0 Example usage with ynl: ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml \ --do rate-set --json '{ "bus-name": "pci", "dev-name": "0000:08:00.0", "port-index": 1, "rate-tc-bws": [ {"rate-tc-index": 0, "rate-tc-bw": 50}, {"rate-tc-index": 1, "rate-tc-bw": 50}, {"rate-tc-index": 2, "rate-tc-bw": 0}, {"rate-tc-index": 3, "rate-tc-bw": 0}, {"rate-tc-index": 4, "rate-tc-bw": 0}, {"rate-tc-index": 5, "rate-tc-bw": 0}, {"rate-tc-index": 6, "rate-tc-bw": 0}, {"rate-tc-index": 7, "rate-tc-bw": 0} ] }' ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml \ --do rate-get --json '{ "bus-name": "pci", "dev-name": "0000:08:00.0", "port-index": 1 }' output for rate-get: {'bus-name': 'pci', 'dev-name': '0000:08:00.0', 'port-index': 1, 'rate-tc-bws': [{'rate-tc-bw': 50, 'rate-tc-index': 0}, {'rate-tc-bw': 50, 'rate-tc-index': 1}, {'rate-tc-bw': 0, 'rate-tc-index': 2}, {'rate-tc-bw': 0, 'rate-tc-index': 3}, {'rate-tc-bw': 0, 'rate-tc-index': 4}, {'rate-tc-bw': 0, 'rate-tc-index': 5}, {'rate-tc-bw': 0, 'rate-tc-index': 6}, {'rate-tc-bw': 0, 'rate-tc-index': 7}], 'rate-tx-max': 0, 'rate-tx-priority': 0, 'rate-tx-share': 0, 'rate-tx-weight': 0, 'rate-type': 'leaf'} Signed-off-by: Carolina Jubran <cjubran@nvidia.com> Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Signed-off-by: Mark Bloch <mbloch@nvidia.com> Link: https://patch.msgid.link/20250629142138.361537-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/devlink')
-rw-r--r--net/devlink/netlink_gen.c15
-rw-r--r--net/devlink/netlink_gen.h1
-rw-r--r--net/devlink/rate.c127
3 files changed, 139 insertions, 4 deletions
diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c
index e340d955cf3b..c50436433c18 100644
--- a/net/devlink/netlink_gen.c
+++ b/net/devlink/netlink_gen.c
@@ -45,6 +45,11 @@ const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_
[DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(15),
};
+const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_ATTR_RATE_TC_BW + 1] = {
+ [DEVLINK_ATTR_RATE_TC_INDEX] = NLA_POLICY_MAX(NLA_U8, DEVLINK_RATE_TC_INDEX_MAX),
+ [DEVLINK_ATTR_RATE_TC_BW] = { .type = NLA_U32, },
+};
+
const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1] = {
[DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG, },
};
@@ -523,7 +528,7 @@ static const struct nla_policy devlink_rate_get_dump_nl_policy[DEVLINK_ATTR_DEV_
};
/* DEVLINK_CMD_RATE_SET - do */
-static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = {
+static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TC_BWS + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
[DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, },
@@ -532,10 +537,11 @@ static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TX_W
[DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, },
[DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, },
[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_RATE_TC_BWS] = NLA_POLICY_NESTED(devlink_dl_rate_tc_bws_nl_policy),
};
/* DEVLINK_CMD_RATE_NEW - do */
-static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = {
+static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TC_BWS + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
[DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, },
@@ -544,6 +550,7 @@ static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TX_W
[DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, },
[DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, },
[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_RATE_TC_BWS] = NLA_POLICY_NESTED(devlink_dl_rate_tc_bws_nl_policy),
};
/* DEVLINK_CMD_RATE_DEL - do */
@@ -1191,7 +1198,7 @@ const struct genl_split_ops devlink_nl_ops[74] = {
.doit = devlink_nl_rate_set_doit,
.post_doit = devlink_nl_post_doit,
.policy = devlink_rate_set_nl_policy,
- .maxattr = DEVLINK_ATTR_RATE_TX_WEIGHT,
+ .maxattr = DEVLINK_ATTR_RATE_TC_BWS,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
},
{
@@ -1201,7 +1208,7 @@ const struct genl_split_ops devlink_nl_ops[74] = {
.doit = devlink_nl_rate_new_doit,
.post_doit = devlink_nl_post_doit,
.policy = devlink_rate_new_nl_policy,
- .maxattr = DEVLINK_ATTR_RATE_TX_WEIGHT,
+ .maxattr = DEVLINK_ATTR_RATE_TC_BWS,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
},
{
diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h
index 8f2bd50ddf5e..fb733b5d4ff1 100644
--- a/net/devlink/netlink_gen.h
+++ b/net/devlink/netlink_gen.h
@@ -13,6 +13,7 @@
/* Common nested types */
extern const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_CAPS + 1];
+extern const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_ATTR_RATE_TC_BW + 1];
extern const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1];
/* Ops table for devlink */
diff --git a/net/devlink/rate.c b/net/devlink/rate.c
index 8828ffaf6cbc..d39300a9b3d4 100644
--- a/net/devlink/rate.c
+++ b/net/devlink/rate.c
@@ -80,6 +80,29 @@ devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
return ERR_PTR(-EINVAL);
}
+static int devlink_rate_put_tc_bws(struct sk_buff *msg, u32 *tc_bw)
+{
+ struct nlattr *nla_tc_bw;
+ int i;
+
+ for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) {
+ nla_tc_bw = nla_nest_start(msg, DEVLINK_ATTR_RATE_TC_BWS);
+ if (!nla_tc_bw)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_RATE_TC_INDEX, i) ||
+ nla_put_u32(msg, DEVLINK_ATTR_RATE_TC_BW, tc_bw[i]))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nla_tc_bw);
+ }
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, nla_tc_bw);
+ return -EMSGSIZE;
+}
+
static int devlink_nl_rate_fill(struct sk_buff *msg,
struct devlink_rate *devlink_rate,
enum devlink_command cmd, u32 portid, u32 seq,
@@ -129,6 +152,9 @@ static int devlink_nl_rate_fill(struct sk_buff *msg,
devlink_rate->parent->name))
goto nla_put_failure;
+ if (devlink_rate_put_tc_bws(msg, devlink_rate->tc_bw))
+ goto nla_put_failure;
+
genlmsg_end(msg, hdr);
return 0;
@@ -316,6 +342,87 @@ devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
return 0;
}
+static int devlink_nl_rate_tc_bw_parse(struct nlattr *parent_nest, u32 *tc_bw,
+ unsigned long *bitmap,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[DEVLINK_ATTR_MAX + 1];
+ u8 tc_index;
+ int err;
+
+ err = nla_parse_nested(tb, DEVLINK_ATTR_MAX, parent_nest,
+ devlink_dl_rate_tc_bws_nl_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[DEVLINK_ATTR_RATE_TC_INDEX]) {
+ NL_SET_ERR_ATTR_MISS(extack, parent_nest,
+ DEVLINK_ATTR_RATE_TC_INDEX);
+ return -EINVAL;
+ }
+
+ tc_index = nla_get_u8(tb[DEVLINK_ATTR_RATE_TC_INDEX]);
+
+ if (!tb[DEVLINK_ATTR_RATE_TC_BW]) {
+ NL_SET_ERR_ATTR_MISS(extack, parent_nest,
+ DEVLINK_ATTR_RATE_TC_BW);
+ return -EINVAL;
+ }
+
+ if (test_and_set_bit(tc_index, bitmap)) {
+ NL_SET_ERR_MSG_FMT(extack,
+ "Duplicate traffic class index specified (%u)",
+ tc_index);
+ return -EINVAL;
+ }
+
+ tc_bw[tc_index] = nla_get_u32(tb[DEVLINK_ATTR_RATE_TC_BW]);
+
+ return 0;
+}
+
+static int devlink_nl_rate_tc_bw_set(struct devlink_rate *devlink_rate,
+ struct genl_info *info)
+{
+ DECLARE_BITMAP(bitmap, DEVLINK_RATE_TCS_MAX) = {};
+ struct devlink *devlink = devlink_rate->devlink;
+ const struct devlink_ops *ops = devlink->ops;
+ u32 tc_bw[DEVLINK_RATE_TCS_MAX] = {};
+ int rem, err = -EOPNOTSUPP, i;
+ struct nlattr *attr;
+
+ nlmsg_for_each_attr_type(attr, DEVLINK_ATTR_RATE_TC_BWS, info->nlhdr,
+ GENL_HDRLEN, rem) {
+ err = devlink_nl_rate_tc_bw_parse(attr, tc_bw, bitmap,
+ info->extack);
+ if (err)
+ return err;
+ }
+
+ for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) {
+ if (!test_bit(i, bitmap)) {
+ NL_SET_ERR_MSG_FMT(info->extack,
+ "Bandwidth values must be specified for all %u traffic classes",
+ DEVLINK_RATE_TCS_MAX);
+ return -EINVAL;
+ }
+ }
+
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_tc_bw_set(devlink_rate, devlink_rate->priv,
+ tc_bw, info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_tc_bw_set(devlink_rate, devlink_rate->priv,
+ tc_bw, info->extack);
+
+ if (err)
+ return err;
+
+ memcpy(devlink_rate->tc_bw, tc_bw, sizeof(tc_bw));
+
+ return 0;
+}
+
static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
const struct devlink_ops *ops,
struct genl_info *info)
@@ -388,6 +495,12 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
return err;
}
+ if (attrs[DEVLINK_ATTR_RATE_TC_BWS]) {
+ err = devlink_nl_rate_tc_bw_set(devlink_rate, info);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -423,6 +536,13 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
"TX weight set isn't supported for the leafs");
return false;
}
+ if (attrs[DEVLINK_ATTR_RATE_TC_BWS] &&
+ !ops->rate_leaf_tc_bw_set) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ attrs[DEVLINK_ATTR_RATE_TC_BWS],
+ "TC bandwidth set isn't supported for the leafs");
+ return false;
+ }
} else if (type == DEVLINK_RATE_TYPE_NODE) {
if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the nodes");
@@ -449,6 +569,13 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
"TX weight set isn't supported for the nodes");
return false;
}
+ if (attrs[DEVLINK_ATTR_RATE_TC_BWS] &&
+ !ops->rate_node_tc_bw_set) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ attrs[DEVLINK_ATTR_RATE_TC_BWS],
+ "TC bandwidth set isn't supported for the nodes");
+ return false;
+ }
} else {
WARN(1, "Unknown type of rate object");
return false;