From b1e64e30fce86e61d3b09f9352b262622f3f0cda Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:23 +0100 Subject: docs: kdoc: don't add broken comments inside prototypes Parsing a file like drivers/scsi/isci/host.h, which contains broken kernel-doc markups makes it create a prototype that contains unmatched end comments. That causes, for instance, struct sci_power_control to be shown this this prototype: struct sci_power_control { * it is not. */ bool timer_started; */ struct sci_timer timer; * requesters field. */ u8 phys_waiting; */ u8 phys_granted_power; * mapped into requesters via struct sci_phy.phy_index */ struct isci_phy *requesters[SCI_MAX_PHYS]; }; as comments won't start with "/*" anymore. Fix the logic to detect such cases, and keep adding the comments inside it. Signed-off-by: Mauro Carvalho Chehab Message-ID: <18e577dbbd538dcc22945ff139fe3638344e14f0.1773074166.git.mchehab+huawei@kernel.org> Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <12ac4a97e2bd5a19d6537122c10098690c38d2c7.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index edf70ba139a5..086579d00b5c 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -1355,6 +1355,12 @@ class KernelDoc: elif doc_content.search(line): self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") self.state = state.PROTO + + # + # Don't let it add partial comments at the code, as breaks the + # logic meant to remove comments from prototypes. + # + self.process_proto_type(ln, "/**\n" + line) # else ... ?? def process_inline_text(self, ln, line): -- cgit v1.2.3 From d5265f7af2d284d5421b763f268157b5fa72f806 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:24 +0100 Subject: docs: kdoc: properly handle empty enum arguments Depending on how the enum proto is written, a comma at the end may incorrectly make kernel-doc parse an arg like " ". Strip spaces before checking if arg is empty. Signed-off-by: Mauro Carvalho Chehab Message-ID: <4182bfb7e5f5b4bbaf05cee1bede691e56247eaf.1773074166.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet Message-ID: <640784283d52c5fc52ea597344ecd567e2fb6e22.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 086579d00b5c..4b3c555e6c8e 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -810,9 +810,10 @@ class KernelDoc: member_set = set() members = KernRe(r'\([^;)]*\)').sub('', members) for arg in members.split(','): - if not arg: - continue arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) + if not arg.strip(): + continue + self.entry.parameterlist.append(arg) if arg not in self.entry.parameterdescs: self.entry.parameterdescs[arg] = self.undescribed -- cgit v1.2.3 From cd77a9aa20ef53a03e5bb2630a5e7b16b910f198 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:26 +0100 Subject: docs: kdoc: use tokenizer to handle comments on structs Better handle comments inside structs. After those changes, all unittests now pass: test_private: TestPublicPrivate: test balanced_inner_private: OK test balanced_non_greddy_private: OK test balanced_private: OK test no private: OK test unbalanced_inner_private: OK test unbalanced_private: OK test unbalanced_struct_group_tagged_with_private: OK test unbalanced_two_struct_group_tagged_first_with_private: OK test unbalanced_without_end_of_line: OK Ran 9 tests This also solves a bug when handling STRUCT_GROUP() with a private comment on it: @@ -397134,7 +397134,7 @@ basic V4L2 device-level support. unsigned int max_len; unsigned int offset; struct page_pool_params_slow slow; - STRUCT_GROUP( struct net_device *netdev; + struct net_device *netdev; unsigned int queue_idx; unsigned int flags; }; Signed-off-by: Mauro Carvalho Chehab Message-ID: Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <054763260f7b5459ad0738ed906d7c358d640692.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 4b3c555e6c8e..62d8030cf532 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -13,6 +13,7 @@ import sys import re from pprint import pformat +from kdoc.c_lex import CTokenizer from kdoc.kdoc_re import NestedMatch, KernRe from kdoc.kdoc_item import KdocItem @@ -84,15 +85,9 @@ def trim_private_members(text): """ Remove ``struct``/``enum`` members that have been marked "private". """ - # First look for a "public:" block that ends a private region, then - # handle the "private until the end" case. - # - text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) - text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) - # - # We needed the comments to do the above, but now we can take them out. - # - return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() + + tokens = CTokenizer(text) + return str(tokens) class state: """ -- cgit v1.2.3 From 600079fdcf46fafe15b4ccd62804d66e05309cc6 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:34 +0100 Subject: docs: kdoc: replace NestedMatch with CMatch Our previous approach to solve nested structs were to use NestedMatch. It works well, but adding support to parse delimiters is very complex. Instead, use CMatch, which uses a C tokenizer, making the code more reliable and simpler. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <900bff66f8093402999f9fe055fbfa3fa33a8d8b.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 2 +- tools/lib/python/kdoc/xforms_lists.py | 31 ++++++++++++++++--------------- 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 62d8030cf532..efd58c88ff31 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -14,7 +14,7 @@ import re from pprint import pformat from kdoc.c_lex import CTokenizer -from kdoc.kdoc_re import NestedMatch, KernRe +from kdoc.kdoc_re import KernRe from kdoc.kdoc_item import KdocItem # diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index c07cbe1e6349..7fa7f52cec7b 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -4,7 +4,8 @@ import re -from kdoc.kdoc_re import KernRe, NestedMatch +from kdoc.kdoc_re import KernRe +from kdoc.c_lex import CMatch struct_args_pattern = r'([^,)]+)' @@ -60,7 +61,7 @@ class CTransforms: # # As it doesn't properly match the end parenthesis on some cases. # - # So, a better solution was crafted: there's now a NestedMatch + # So, a better solution was crafted: there's now a CMatch # class that ensures that delimiters after a search are properly # matched. So, the implementation to drop STRUCT_GROUP() will be # handled in separate. @@ -72,9 +73,9 @@ class CTransforms: # # Replace macros # - # TODO: use NestedMatch for FOO($1, $2, ...) matches + # TODO: use CMatch for FOO($1, $2, ...) matches # - # it is better to also move those to the NestedMatch logic, + # it is better to also move those to the CMatch logic, # to ensure that parentheses will be properly matched. # (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), @@ -95,17 +96,17 @@ class CTransforms: (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), - (NestedMatch(r"__cond_acquires\s*\("), ""), - (NestedMatch(r"__cond_releases\s*\("), ""), - (NestedMatch(r"__acquires\s*\("), ""), - (NestedMatch(r"__releases\s*\("), ""), - (NestedMatch(r"__must_hold\s*\("), ""), - (NestedMatch(r"__must_not_hold\s*\("), ""), - (NestedMatch(r"__must_hold_shared\s*\("), ""), - (NestedMatch(r"__cond_acquires_shared\s*\("), ""), - (NestedMatch(r"__acquires_shared\s*\("), ""), - (NestedMatch(r"__releases_shared\s*\("), ""), - (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'), + (CMatch(r"__cond_acquires"), ""), + (CMatch(r"__cond_releases"), ""), + (CMatch(r"__acquires"), ""), + (CMatch(r"__releases"), ""), + (CMatch(r"__must_hold"), ""), + (CMatch(r"__must_not_hold"), ""), + (CMatch(r"__must_hold_shared"), ""), + (CMatch(r"__cond_acquires_shared"), ""), + (CMatch(r"__acquires_shared"), ""), + (CMatch(r"__releases_shared"), ""), + (CMatch(r"STRUCT_GROUP"), r'\0'), ] #: Transforms for function prototypes. -- cgit v1.2.3 From 024e200e2a89d71dceff7d1bff8ae77b145726e0 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:38 +0100 Subject: docs: c_lex: setup a logger to report tokenizer issues Report file that has issues detected via CMatch and CTokenizer. This is done by setting up a logger that will be overriden by kdoc_parser, when used on it. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <903ad83ae176196a50444e66177a4f5bcdef5199.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/c_lex.py | 16 ++++++++++++++++ tools/lib/python/kdoc/kdoc_parser.py | 4 +++- 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py index 20e50ff0ecd5..b6d58bd470a9 100644 --- a/tools/lib/python/kdoc/c_lex.py +++ b/tools/lib/python/kdoc/c_lex.py @@ -22,6 +22,22 @@ from .kdoc_re import KernRe log = logging.getLogger(__name__) +def tokenizer_set_log(logger, prefix = ""): + """ + Replace the module‑level logger with a LoggerAdapter that + prepends *prefix* to every message. + """ + global log + + class PrefixAdapter(logging.LoggerAdapter): + """ + Ancillary class to set prefix on all message logs. + """ + def process(self, msg, kwargs): + return f"{prefix}{msg}", kwargs + + # Wrap the provided logger in our adapter + log = PrefixAdapter(logger, {"prefix": prefix}) class CToken(): """ diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index efd58c88ff31..f90c6dd0343d 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -13,7 +13,7 @@ import sys import re from pprint import pformat -from kdoc.c_lex import CTokenizer +from kdoc.c_lex import CTokenizer, tokenizer_set_log from kdoc.kdoc_re import KernRe from kdoc.kdoc_item import KdocItem @@ -253,6 +253,8 @@ class KernelDoc: self.config = config self.xforms = xforms + tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ") + # Initial state for the state machines self.state = state.NORMAL -- cgit v1.2.3 From 12aa7753ff4c5fea405d139bcf67f49bda2c932e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:40 +0100 Subject: docs: kdoc: ensure that comments are dropped before calling split_struct_proto() Changeset 2b957decdb6c ("docs: kdoc: don't add broken comments inside prototypes") revealed a hidden bug at split_struct_proto(): some comments there may break its capability of properly identifying a struct. Fixing it is as simple as stripping comments before calling it. Fixes: 2b957decdb6c ("docs: kdoc: don't add broken comments inside prototypes") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_parser.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index f90c6dd0343d..8b2c9d0f0c58 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -723,6 +723,7 @@ class KernelDoc: # # Do the basic parse to get the pieces of the declaration. # + proto = trim_private_members(proto) struct_parts = self.split_struct_proto(proto) if not struct_parts: self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") @@ -763,6 +764,7 @@ class KernelDoc: # Strip preprocessor directives. Note that this depends on the # trailing semicolon we added in process_proto_type(). # + proto = trim_private_members(proto) proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) # # Parse out the name and members of the enum. Typedef form first. @@ -770,7 +772,7 @@ class KernelDoc: r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') if r.search(proto): declaration_name = r.group(2) - members = trim_private_members(r.group(1)) + members = r.group(1) # # Failing that, look for a straight enum # @@ -778,7 +780,7 @@ class KernelDoc: r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') if r.match(proto): declaration_name = r.group(1) - members = trim_private_members(r.group(2)) + members = r.group(2) # # OK, this isn't going to work. # -- cgit v1.2.3 From 79d881beb721d27f679f0dc1cba2d5fe2d7f6d8d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:41 +0100 Subject: docs: kdoc_parser: avoid tokenizing structs everytime Most of the rules inside CTransforms are of the type CMatch. Don't re-parse the source code every time. Doing this doesn't change the output, but makes kdoc almost as fast as before the tokenizer patches: # Before tokenizer patches $ time ./scripts/kernel-doc . -man >original 2>&1 real 0m42.933s user 0m36.523s sys 0m1.145s # After tokenizer patches $ time ./scripts/kernel-doc . -man >before 2>&1 real 1m29.853s user 1m23.974s sys 0m1.237s # After this patch $ time ./scripts/kernel-doc . -man >after 2>&1 real 0m48.579s user 0m45.938s sys 0m0.988s $ diff -s before after Files before and after are identical Manually checked the differences between original and after with: $ diff -U0 -prBw original after|grep -v Warning|grep -v "@@"|less They're due: - whitespace fixes; - struct_group are now better handled; - several badly-generated man pages from broken inline kernel-doc markups are now fixed. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <1cc2a4286ebf7d4b2d03fcaf42a1ba9fa09004b9.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 1 - tools/lib/python/kdoc/xforms_lists.py | 30 ++++++++++++++++++++++++------ 2 files changed, 24 insertions(+), 7 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_parser.py') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 8b2c9d0f0c58..f6c4ee3b18c9 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -737,7 +737,6 @@ class KernelDoc: # # Go through the list of members applying all of our transformations. # - members = trim_private_members(members) members = self.xforms.apply("struct", members) # diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index 2056572852fd..5a62d4a450cb 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -5,7 +5,7 @@ import re from kdoc.kdoc_re import KernRe -from kdoc.c_lex import CMatch +from kdoc.c_lex import CMatch, CTokenizer struct_args_pattern = r'([^,)]+)' @@ -16,6 +16,12 @@ class CTransforms: into something we can parse and generate kdoc for. """ + # + # NOTE: + # Due to performance reasons, place CMatch rules before KernRe, + # as this avoids running the C parser every time. + # + #: Transforms for structs and unions. struct_xforms = [ # Strip attributes @@ -124,13 +130,25 @@ class CTransforms: "var": var_xforms, } - def apply(self, xforms_type, text): + def apply(self, xforms_type, source): """ - Apply a set of transforms to a block of text. + Apply a set of transforms to a block of source. + + As tokenizer is used here, this function also remove comments + at the end. """ if xforms_type not in self.xforms: - return text + return source + + if isinstance(source, str): + source = CTokenizer(source) for search, subst in self.xforms[xforms_type]: - text = search.sub(subst, text) - return text + # + # KernRe only accept strings. + # + if isinstance(search, KernRe): + source = str(source) + + source = search.sub(subst, source) + return str(source) -- cgit v1.2.3