xref: /linux/tools/lib/python/kdoc/kdoc_parser.py (revision 26a4cfaff82a2dcb810f6bfd5f4842f9b6046c8a)
1d966dc65SMauro Carvalho Chehab#!/usr/bin/env python3
2d966dc65SMauro Carvalho Chehab# SPDX-License-Identifier: GPL-2.0
3d966dc65SMauro Carvalho Chehab# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4d966dc65SMauro Carvalho Chehab#
5d966dc65SMauro Carvalho Chehab# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702
6d966dc65SMauro Carvalho Chehab
7d966dc65SMauro Carvalho Chehab"""
850206750SMauro Carvalho ChehabClasses and functions related to reading a C language source or header FILE
950206750SMauro Carvalho Chehaband extract embedded documentation comments from it.
10d966dc65SMauro Carvalho Chehab"""
11d966dc65SMauro Carvalho Chehab
1240020fe8SJonathan Corbetimport sys
13d966dc65SMauro Carvalho Chehabimport re
14d966dc65SMauro Carvalho Chehabfrom pprint import pformat
15d966dc65SMauro Carvalho Chehab
16992a9df4SJonathan Corbetfrom kdoc.kdoc_re import NestedMatch, KernRe
17992a9df4SJonathan Corbetfrom kdoc.kdoc_item import KdocItem
18d966dc65SMauro Carvalho Chehab
19d966dc65SMauro Carvalho Chehab#
20d966dc65SMauro Carvalho Chehab# Regular expressions used to parse kernel-doc markups at KernelDoc class.
21d966dc65SMauro Carvalho Chehab#
225f88f44dSRandy Dunlap# Let's declare them in lowercase outside any class to make it easier to
235f88f44dSRandy Dunlap# convert from the Perl script.
24d966dc65SMauro Carvalho Chehab#
25d966dc65SMauro Carvalho Chehab# As those are evaluated at the beginning, no need to cache them
26d966dc65SMauro Carvalho Chehab#
27d966dc65SMauro Carvalho Chehab
28d966dc65SMauro Carvalho Chehab# Allow whitespace at end of comment start.
2904a383ceSMauro Carvalho Chehabdoc_start = KernRe(r'^/\*\*\s*$', cache=False)
30d966dc65SMauro Carvalho Chehab
3104a383ceSMauro Carvalho Chehabdoc_end = KernRe(r'\*/', cache=False)
3204a383ceSMauro Carvalho Chehabdoc_com = KernRe(r'\s*\*\s*', cache=False)
3304a383ceSMauro Carvalho Chehabdoc_com_body = KernRe(r'\s*\* ?', cache=False)
3404a383ceSMauro Carvalho Chehabdoc_decl = doc_com + KernRe(r'(\w+)', cache=False)
35d966dc65SMauro Carvalho Chehab
36d966dc65SMauro Carvalho Chehab# @params and a strictly limited set of supported section names
37d966dc65SMauro Carvalho Chehab# Specifically:
38d966dc65SMauro Carvalho Chehab#   Match @word:
39d966dc65SMauro Carvalho Chehab#         @...:
40d966dc65SMauro Carvalho Chehab#         @{section-name}:
41d966dc65SMauro Carvalho Chehab# while trying to not match literal block starts like "example::"
42d966dc65SMauro Carvalho Chehab#
43636d4d9eSJonathan Corbetknown_section_names = 'description|context|returns?|notes?|examples?'
44636d4d9eSJonathan Corbetknown_sections = KernRe(known_section_names, flags = re.I)
45d966dc65SMauro Carvalho Chehabdoc_sect = doc_com + \
465fd513f0SJonathan Corbet    KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$',
47d966dc65SMauro Carvalho Chehab           flags=re.I, cache=False)
48d966dc65SMauro Carvalho Chehab
4904a383ceSMauro Carvalho Chehabdoc_content = doc_com_body + KernRe(r'(.*)', cache=False)
5004a383ceSMauro Carvalho Chehabdoc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False)
5104a383ceSMauro Carvalho Chehabdoc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False)
5204a383ceSMauro Carvalho Chehabdoc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False)
539dbbd32eSSteven Pricedoc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False)
54d966dc65SMauro Carvalho Chehab
5504a383ceSMauro Carvalho Chehabexport_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False)
5604a383ceSMauro Carvalho Chehabexport_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False)
57d966dc65SMauro Carvalho Chehab
585fd513f0SJonathan Corbettype_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
59d966dc65SMauro Carvalho Chehab
60f9b4cf2eSJonathan Corbet#
610682bde2SJonathan Corbet# Tests for the beginning of a kerneldoc block in its various forms.
620682bde2SJonathan Corbet#
630682bde2SJonathan Corbetdoc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False)
641045ec38SMauro Carvalho Chehabdoc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False)
650682bde2SJonathan Corbetdoc_begin_func = KernRe(str(doc_com) +			# initial " * '
660682bde2SJonathan Corbet                        r"(?:\w+\s*\*\s*)?" + 		# type (not captured)
670682bde2SJonathan Corbet                        r'(?:define\s+)?' + 		# possible "define" (not captured)
680682bde2SJonathan Corbet                        r'(\w+)\s*(?:\(\w*\))?\s*' +	# name and optional "(...)"
690682bde2SJonathan Corbet                        r'(?:[-:].*)?$',		# description (not captured)
700682bde2SJonathan Corbet                        cache = False)
710682bde2SJonathan Corbet
720682bde2SJonathan Corbet#
7364cf83bcSJonathan Corbet# Here begins a long set of transformations to turn structure member prefixes
7464cf83bcSJonathan Corbet# and macro invocations into something we can parse and generate kdoc for.
7564cf83bcSJonathan Corbet#
7664cf83bcSJonathan Corbetstruct_args_pattern = r'([^,)]+)'
7764cf83bcSJonathan Corbet
78a2752f8cSJonathan Corbetstruct_xforms = [
7964cf83bcSJonathan Corbet    # Strip attributes
80f853e830SJonathan Corbet    (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),
8164cf83bcSJonathan Corbet    (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
8264cf83bcSJonathan Corbet    (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
8364cf83bcSJonathan Corbet    (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
8464cf83bcSJonathan Corbet    (KernRe(r'\s*__packed\s*', re.S), ' '),
8564cf83bcSJonathan Corbet    (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
862006f468SRandy Dunlap    (KernRe(r'\s*__private', re.S), ' '),
872006f468SRandy Dunlap    (KernRe(r'\s*__rcu', re.S), ' '),
8864cf83bcSJonathan Corbet    (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
8964cf83bcSJonathan Corbet    (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
908b00d6feSJonathan Corbet    (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
9164cf83bcSJonathan Corbet    #
9264cf83bcSJonathan Corbet    # Unwrap struct_group macros based on this definition:
9364cf83bcSJonathan Corbet    # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
9464cf83bcSJonathan Corbet    # which has variants like: struct_group(NAME, MEMBERS...)
9564cf83bcSJonathan Corbet    # Only MEMBERS arguments require documentation.
9664cf83bcSJonathan Corbet    #
9764cf83bcSJonathan Corbet    # Parsing them happens on two steps:
9864cf83bcSJonathan Corbet    #
9964cf83bcSJonathan Corbet    # 1. drop struct group arguments that aren't at MEMBERS,
10064cf83bcSJonathan Corbet    #    storing them as STRUCT_GROUP(MEMBERS)
10164cf83bcSJonathan Corbet    #
10264cf83bcSJonathan Corbet    # 2. remove STRUCT_GROUP() ancillary macro.
10364cf83bcSJonathan Corbet    #
10464cf83bcSJonathan Corbet    # The original logic used to remove STRUCT_GROUP() using an
10564cf83bcSJonathan Corbet    # advanced regex:
10664cf83bcSJonathan Corbet    #
10764cf83bcSJonathan Corbet    #   \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
10864cf83bcSJonathan Corbet    #
10964cf83bcSJonathan Corbet    # with two patterns that are incompatible with
11064cf83bcSJonathan Corbet    # Python re module, as it has:
11164cf83bcSJonathan Corbet    #
11264cf83bcSJonathan Corbet    #   - a recursive pattern: (?1)
11364cf83bcSJonathan Corbet    #   - an atomic grouping: (?>...)
11464cf83bcSJonathan Corbet    #
11564cf83bcSJonathan Corbet    # I tried a simpler version: but it didn't work either:
11664cf83bcSJonathan Corbet    #   \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
11764cf83bcSJonathan Corbet    #
11864cf83bcSJonathan Corbet    # As it doesn't properly match the end parenthesis on some cases.
11964cf83bcSJonathan Corbet    #
12064cf83bcSJonathan Corbet    # So, a better solution was crafted: there's now a NestedMatch
12164cf83bcSJonathan Corbet    # class that ensures that delimiters after a search are properly
12264cf83bcSJonathan Corbet    # matched. So, the implementation to drop STRUCT_GROUP() will be
12364cf83bcSJonathan Corbet    # handled in separate.
12464cf83bcSJonathan Corbet    #
12564cf83bcSJonathan Corbet    (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
12664cf83bcSJonathan Corbet    (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
12764cf83bcSJonathan Corbet    (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
12864cf83bcSJonathan Corbet    (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
12964cf83bcSJonathan Corbet    #
13064cf83bcSJonathan Corbet    # Replace macros
13164cf83bcSJonathan Corbet    #
13264cf83bcSJonathan Corbet    # TODO: use NestedMatch for FOO($1, $2, ...) matches
13364cf83bcSJonathan Corbet    #
13464cf83bcSJonathan Corbet    # it is better to also move those to the NestedMatch logic,
1355f88f44dSRandy Dunlap    # to ensure that parentheses will be properly matched.
13664cf83bcSJonathan Corbet    #
13764cf83bcSJonathan Corbet    (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
13864cf83bcSJonathan Corbet     r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
13964cf83bcSJonathan Corbet    (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
14064cf83bcSJonathan Corbet     r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
14164cf83bcSJonathan Corbet    (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
14264cf83bcSJonathan Corbet            re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
14364cf83bcSJonathan Corbet    (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
14464cf83bcSJonathan Corbet            re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
14564cf83bcSJonathan Corbet    (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +
14664cf83bcSJonathan Corbet            r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
14764cf83bcSJonathan Corbet    (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
14864cf83bcSJonathan Corbet            struct_args_pattern + r'\)', re.S), r'\2 *\1'),
14964cf83bcSJonathan Corbet    (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +
15064cf83bcSJonathan Corbet            struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
15164cf83bcSJonathan Corbet    (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
15264cf83bcSJonathan Corbet    (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
15364cf83bcSJonathan Corbet]
15464cf83bcSJonathan Corbet#
1555f88f44dSRandy Dunlap# Regexes here are guaranteed to have the end delimiter matching
15664cf83bcSJonathan Corbet# the start delimiter. Yet, right now, only one replace group
15764cf83bcSJonathan Corbet# is allowed.
15864cf83bcSJonathan Corbet#
15964cf83bcSJonathan Corbetstruct_nested_prefixes = [
16064cf83bcSJonathan Corbet    (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
16164cf83bcSJonathan Corbet]
16264cf83bcSJonathan Corbet
1634c232a81SJonathan Corbet#
1644c232a81SJonathan Corbet# Transforms for function prototypes
1654c232a81SJonathan Corbet#
1664c232a81SJonathan Corbetfunction_xforms  = [
167a2752f8cSJonathan Corbet    (KernRe(r"^static +"), ""),
168a2752f8cSJonathan Corbet    (KernRe(r"^extern +"), ""),
169a2752f8cSJonathan Corbet    (KernRe(r"^asmlinkage +"), ""),
170a2752f8cSJonathan Corbet    (KernRe(r"^inline +"), ""),
171a2752f8cSJonathan Corbet    (KernRe(r"^__inline__ +"), ""),
172a2752f8cSJonathan Corbet    (KernRe(r"^__inline +"), ""),
173a2752f8cSJonathan Corbet    (KernRe(r"^__always_inline +"), ""),
174a2752f8cSJonathan Corbet    (KernRe(r"^noinline +"), ""),
175a2752f8cSJonathan Corbet    (KernRe(r"^__FORTIFY_INLINE +"), ""),
176a2752f8cSJonathan Corbet    (KernRe(r"__init +"), ""),
177a2752f8cSJonathan Corbet    (KernRe(r"__init_or_module +"), ""),
178*b211a306SRandy Dunlap    (KernRe(r"__exit +"), ""),
179a2752f8cSJonathan Corbet    (KernRe(r"__deprecated +"), ""),
180a2752f8cSJonathan Corbet    (KernRe(r"__flatten +"), ""),
181a2752f8cSJonathan Corbet    (KernRe(r"__meminit +"), ""),
182a2752f8cSJonathan Corbet    (KernRe(r"__must_check +"), ""),
183a2752f8cSJonathan Corbet    (KernRe(r"__weak +"), ""),
184a2752f8cSJonathan Corbet    (KernRe(r"__sched +"), ""),
185a2752f8cSJonathan Corbet    (KernRe(r"_noprof"), ""),
1862006f468SRandy Dunlap    (KernRe(r"__always_unused *"), ""),
187a2752f8cSJonathan Corbet    (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
188a2752f8cSJonathan Corbet    (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
189a2752f8cSJonathan Corbet    (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
190a2752f8cSJonathan Corbet    (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
191a2752f8cSJonathan Corbet    (KernRe(r"__attribute_const__ +"), ""),
192a2752f8cSJonathan Corbet    (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
1934c232a81SJonathan Corbet]
1944c232a81SJonathan Corbet
195a2752f8cSJonathan Corbet#
19650206750SMauro Carvalho Chehab# Ancillary functions
197a2752f8cSJonathan Corbet#
19850206750SMauro Carvalho Chehab
199a2752f8cSJonathan Corbetdef apply_transforms(xforms, text):
20050206750SMauro Carvalho Chehab    """
20150206750SMauro Carvalho Chehab    Apply a set of transforms to a block of text.
20250206750SMauro Carvalho Chehab    """
203a2752f8cSJonathan Corbet    for search, subst in xforms:
204a2752f8cSJonathan Corbet        text = search.sub(subst, text)
205a2752f8cSJonathan Corbet    return text
20664cf83bcSJonathan Corbet
207f9b4cf2eSJonathan Corbetmulti_space = KernRe(r'\s\s+')
208f9b4cf2eSJonathan Corbetdef trim_whitespace(s):
20950206750SMauro Carvalho Chehab    """
21050206750SMauro Carvalho Chehab    A little helper to get rid of excess white space.
21150206750SMauro Carvalho Chehab    """
212f9b4cf2eSJonathan Corbet    return multi_space.sub(' ', s.strip())
213f9b4cf2eSJonathan Corbet
2146656ae4dSJonathan Corbetdef trim_private_members(text):
21550206750SMauro Carvalho Chehab    """
21650206750SMauro Carvalho Chehab    Remove ``struct``/``enum`` members that have been marked "private".
21750206750SMauro Carvalho Chehab    """
2186656ae4dSJonathan Corbet    # First look for a "public:" block that ends a private region, then
2196656ae4dSJonathan Corbet    # handle the "private until the end" case.
2206656ae4dSJonathan Corbet    #
2216656ae4dSJonathan Corbet    text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text)
2226656ae4dSJonathan Corbet    text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text)
2236656ae4dSJonathan Corbet    #
2246656ae4dSJonathan Corbet    # We needed the comments to do the above, but now we can take them out.
2256656ae4dSJonathan Corbet    #
2266656ae4dSJonathan Corbet    return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip()
2276656ae4dSJonathan Corbet
228f9cdbc57SMauro Carvalho Chehabclass state:
229d966dc65SMauro Carvalho Chehab    """
23050206750SMauro Carvalho Chehab    States used by the parser's state machine.
231d966dc65SMauro Carvalho Chehab    """
232d966dc65SMauro Carvalho Chehab
233d966dc65SMauro Carvalho Chehab    # Parser states
23450206750SMauro Carvalho Chehab    NORMAL        = 0        #: Normal code.
23550206750SMauro Carvalho Chehab    NAME          = 1        #: Looking for function name.
23650206750SMauro Carvalho Chehab    DECLARATION   = 2        #: We have seen a declaration which might not be done.
23750206750SMauro Carvalho Chehab    BODY          = 3        #: The body of the comment.
23850206750SMauro Carvalho Chehab    SPECIAL_SECTION = 4      #: Doc section ending with a blank line.
23950206750SMauro Carvalho Chehab    PROTO         = 5        #: Scanning prototype.
24050206750SMauro Carvalho Chehab    DOCBLOCK      = 6        #: Documentation block.
24150206750SMauro Carvalho Chehab    INLINE_NAME   = 7        #: Gathering doc outside main block.
24250206750SMauro Carvalho Chehab    INLINE_TEXT   = 8	     #: Reading the body of inline docs.
243d966dc65SMauro Carvalho Chehab
24450206750SMauro Carvalho Chehab    #: Names for each parser state.
245f9cdbc57SMauro Carvalho Chehab    name = [
246d966dc65SMauro Carvalho Chehab        "NORMAL",
247d966dc65SMauro Carvalho Chehab        "NAME",
248e4153a22SJonathan Corbet        "DECLARATION",
249d966dc65SMauro Carvalho Chehab        "BODY",
25074cee0dfSJonathan Corbet        "SPECIAL_SECTION",
251d966dc65SMauro Carvalho Chehab        "PROTO",
252d966dc65SMauro Carvalho Chehab        "DOCBLOCK",
253096f73abSJonathan Corbet        "INLINE_NAME",
254096f73abSJonathan Corbet        "INLINE_TEXT",
255d966dc65SMauro Carvalho Chehab    ]
256d966dc65SMauro Carvalho Chehab
257d966dc65SMauro Carvalho Chehab
25850206750SMauro Carvalho ChehabSECTION_DEFAULT = "Description"  #: Default section.
259e3b42e94SMauro Carvalho Chehab
260e3b42e94SMauro Carvalho Chehabclass KernelEntry:
26150206750SMauro Carvalho Chehab    """
26250206750SMauro Carvalho Chehab    Encapsulates a Kernel documentation entry.
26350206750SMauro Carvalho Chehab    """
264e3b42e94SMauro Carvalho Chehab
2652bd22194SMauro Carvalho Chehab    def __init__(self, config, fname, ln):
266e3b42e94SMauro Carvalho Chehab        self.config = config
2672bd22194SMauro Carvalho Chehab        self.fname = fname
268e3b42e94SMauro Carvalho Chehab
269d6699d5fSJonathan Corbet        self._contents = []
270e3b42e94SMauro Carvalho Chehab        self.prototype = ""
271e3b42e94SMauro Carvalho Chehab
272e3b42e94SMauro Carvalho Chehab        self.warnings = []
273e3b42e94SMauro Carvalho Chehab
274e3b42e94SMauro Carvalho Chehab        self.parameterlist = []
275e3b42e94SMauro Carvalho Chehab        self.parameterdescs = {}
276e3b42e94SMauro Carvalho Chehab        self.parametertypes = {}
277e3b42e94SMauro Carvalho Chehab        self.parameterdesc_start_lines = {}
278e3b42e94SMauro Carvalho Chehab
279e3b42e94SMauro Carvalho Chehab        self.section_start_lines = {}
280e3b42e94SMauro Carvalho Chehab        self.sections = {}
281e3b42e94SMauro Carvalho Chehab
282e3b42e94SMauro Carvalho Chehab        self.anon_struct_union = False
283e3b42e94SMauro Carvalho Chehab
284e3b42e94SMauro Carvalho Chehab        self.leading_space = None
285e3b42e94SMauro Carvalho Chehab
286e5e7ca66SJacob Keller        self.fname = fname
287e5e7ca66SJacob Keller
288e3b42e94SMauro Carvalho Chehab        # State flags
289e3b42e94SMauro Carvalho Chehab        self.brcount = 0
290e3b42e94SMauro Carvalho Chehab        self.declaration_start_line = ln + 1
291e3b42e94SMauro Carvalho Chehab
292d6699d5fSJonathan Corbet    #
293d6699d5fSJonathan Corbet    # Management of section contents
294d6699d5fSJonathan Corbet    #
295d6699d5fSJonathan Corbet    def add_text(self, text):
29650206750SMauro Carvalho Chehab        """Add a new text to the entry contents list."""
297d6699d5fSJonathan Corbet        self._contents.append(text)
298d6699d5fSJonathan Corbet
299d6699d5fSJonathan Corbet    def contents(self):
30050206750SMauro Carvalho Chehab        """Returns a string with all content texts that were added."""
301d6699d5fSJonathan Corbet        return '\n'.join(self._contents) + '\n'
302d6699d5fSJonathan Corbet
303e3b42e94SMauro Carvalho Chehab    # TODO: rename to emit_message after removal of kernel-doc.pl
304e5e7ca66SJacob Keller    def emit_msg(self, ln, msg, *, warning=True):
305292eca31SMauro Carvalho Chehab        """Emit a message."""
306e3b42e94SMauro Carvalho Chehab
307e5e7ca66SJacob Keller        log_msg = f"{self.fname}:{ln} {msg}"
308e5e7ca66SJacob Keller
309e3b42e94SMauro Carvalho Chehab        if not warning:
310e3b42e94SMauro Carvalho Chehab            self.config.log.info(log_msg)
311e3b42e94SMauro Carvalho Chehab            return
312e3b42e94SMauro Carvalho Chehab
313e3b42e94SMauro Carvalho Chehab        # Delegate warning output to output logic, as this way it
314e3b42e94SMauro Carvalho Chehab        # will report warnings/info only for symbols that are output
315e3b42e94SMauro Carvalho Chehab
316e3b42e94SMauro Carvalho Chehab        self.warnings.append(log_msg)
317e3b42e94SMauro Carvalho Chehab        return
318e3b42e94SMauro Carvalho Chehab
319df275526SJonathan Corbet    def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False):
32050206750SMauro Carvalho Chehab        """
32150206750SMauro Carvalho Chehab        Begin a new section.
32250206750SMauro Carvalho Chehab        """
323df275526SJonathan Corbet        if dump:
324df275526SJonathan Corbet            self.dump_section(start_new = True)
325df275526SJonathan Corbet        self.section = title
326df275526SJonathan Corbet        self.new_start_line = line_no
327df275526SJonathan Corbet
328e3b42e94SMauro Carvalho Chehab    def dump_section(self, start_new=True):
329e3b42e94SMauro Carvalho Chehab        """
330e3b42e94SMauro Carvalho Chehab        Dumps section contents to arrays/hashes intended for that purpose.
331e3b42e94SMauro Carvalho Chehab        """
332d6699d5fSJonathan Corbet        #
333d6699d5fSJonathan Corbet        # If we have accumulated no contents in the default ("description")
334d6699d5fSJonathan Corbet        # section, don't bother.
335d6699d5fSJonathan Corbet        #
336d6699d5fSJonathan Corbet        if self.section == SECTION_DEFAULT and not self._contents:
337d6699d5fSJonathan Corbet            return
338e3b42e94SMauro Carvalho Chehab        name = self.section
339d6699d5fSJonathan Corbet        contents = self.contents()
340e3b42e94SMauro Carvalho Chehab
341e3b42e94SMauro Carvalho Chehab        if type_param.match(name):
342e3b42e94SMauro Carvalho Chehab            name = type_param.group(1)
343e3b42e94SMauro Carvalho Chehab
344e3b42e94SMauro Carvalho Chehab            self.parameterdescs[name] = contents
345e3b42e94SMauro Carvalho Chehab            self.parameterdesc_start_lines[name] = self.new_start_line
346e3b42e94SMauro Carvalho Chehab
347e3b42e94SMauro Carvalho Chehab            self.new_start_line = 0
348e3b42e94SMauro Carvalho Chehab
349e3b42e94SMauro Carvalho Chehab        else:
350e3b42e94SMauro Carvalho Chehab            if name in self.sections and self.sections[name] != "":
351e3b42e94SMauro Carvalho Chehab                # Only warn on user-specified duplicate section names
352e3b42e94SMauro Carvalho Chehab                if name != SECTION_DEFAULT:
353e3b42e94SMauro Carvalho Chehab                    self.emit_msg(self.new_start_line,
354e5e7ca66SJacob Keller                                  f"duplicate section name '{name}'")
355d6699d5fSJonathan Corbet                # Treat as a new paragraph - add a blank line
356d6699d5fSJonathan Corbet                self.sections[name] += '\n' + contents
357e3b42e94SMauro Carvalho Chehab            else:
358e3b42e94SMauro Carvalho Chehab                self.sections[name] = contents
359e3b42e94SMauro Carvalho Chehab                self.section_start_lines[name] = self.new_start_line
360e3b42e94SMauro Carvalho Chehab                self.new_start_line = 0
361e3b42e94SMauro Carvalho Chehab
362e3b42e94SMauro Carvalho Chehab#        self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
363e3b42e94SMauro Carvalho Chehab
364e3b42e94SMauro Carvalho Chehab        if start_new:
365e3b42e94SMauro Carvalho Chehab            self.section = SECTION_DEFAULT
366d6699d5fSJonathan Corbet            self._contents = []
367e3b42e94SMauro Carvalho Chehab
368ade9b957SMauro Carvalho Chehabpython_warning = False
369f9cdbc57SMauro Carvalho Chehab
370f9cdbc57SMauro Carvalho Chehabclass KernelDoc:
371f9cdbc57SMauro Carvalho Chehab    """
372f9cdbc57SMauro Carvalho Chehab    Read a C language source or header FILE and extract embedded
373f9cdbc57SMauro Carvalho Chehab    documentation comments.
374f9cdbc57SMauro Carvalho Chehab    """
375f9cdbc57SMauro Carvalho Chehab
37650206750SMauro Carvalho Chehab    #: Name of context section.
377d966dc65SMauro Carvalho Chehab    section_context = "Context"
37850206750SMauro Carvalho Chehab
37950206750SMauro Carvalho Chehab    #: Name of return section.
380d966dc65SMauro Carvalho Chehab    section_return = "Return"
381d966dc65SMauro Carvalho Chehab
38250206750SMauro Carvalho Chehab    #: String to write when a parameter is not described.
383d966dc65SMauro Carvalho Chehab    undescribed = "-- undescribed --"
384d966dc65SMauro Carvalho Chehab
385d966dc65SMauro Carvalho Chehab    def __init__(self, config, fname):
386d966dc65SMauro Carvalho Chehab        """Initialize internal variables"""
387d966dc65SMauro Carvalho Chehab
388d966dc65SMauro Carvalho Chehab        self.fname = fname
389d966dc65SMauro Carvalho Chehab        self.config = config
390d966dc65SMauro Carvalho Chehab
391d966dc65SMauro Carvalho Chehab        # Initial state for the state machines
392f9cdbc57SMauro Carvalho Chehab        self.state = state.NORMAL
393d966dc65SMauro Carvalho Chehab
394d966dc65SMauro Carvalho Chehab        # Store entry currently being processed
395d966dc65SMauro Carvalho Chehab        self.entry = None
396d966dc65SMauro Carvalho Chehab
397d966dc65SMauro Carvalho Chehab        # Place all potential outputs into an array
398d966dc65SMauro Carvalho Chehab        self.entries = []
399d966dc65SMauro Carvalho Chehab
40040020fe8SJonathan Corbet        #
40140020fe8SJonathan Corbet        # We need Python 3.7 for its "dicts remember the insertion
40240020fe8SJonathan Corbet        # order" guarantee
40340020fe8SJonathan Corbet        #
404ade9b957SMauro Carvalho Chehab        global python_warning
405ade9b957SMauro Carvalho Chehab        if (not python_warning and
406ade9b957SMauro Carvalho Chehab            sys.version_info.major == 3 and sys.version_info.minor < 7):
407ade9b957SMauro Carvalho Chehab
40840020fe8SJonathan Corbet            self.emit_msg(0,
40940020fe8SJonathan Corbet                          'Python 3.7 or later is required for correct results')
410ade9b957SMauro Carvalho Chehab            python_warning = True
41140020fe8SJonathan Corbet
412e5e7ca66SJacob Keller    def emit_msg(self, ln, msg, *, warning=True):
413d966dc65SMauro Carvalho Chehab        """Emit a message"""
414d966dc65SMauro Carvalho Chehab
4159cbc2d3bSMauro Carvalho Chehab        if self.entry:
416e5e7ca66SJacob Keller            self.entry.emit_msg(ln, msg, warning=warning)
4179cbc2d3bSMauro Carvalho Chehab            return
4189cbc2d3bSMauro Carvalho Chehab
419e5e7ca66SJacob Keller        log_msg = f"{self.fname}:{ln} {msg}"
420e5e7ca66SJacob Keller
421e3b42e94SMauro Carvalho Chehab        if warning:
4229cbc2d3bSMauro Carvalho Chehab            self.config.log.warning(log_msg)
423e3b42e94SMauro Carvalho Chehab        else:
424e3b42e94SMauro Carvalho Chehab            self.config.log.info(log_msg)
425d966dc65SMauro Carvalho Chehab
426d966dc65SMauro Carvalho Chehab    def dump_section(self, start_new=True):
427d966dc65SMauro Carvalho Chehab        """
42850206750SMauro Carvalho Chehab        Dump section contents to arrays/hashes intended for that purpose.
429d966dc65SMauro Carvalho Chehab        """
430d966dc65SMauro Carvalho Chehab
431e3b42e94SMauro Carvalho Chehab        if self.entry:
432e3b42e94SMauro Carvalho Chehab            self.entry.dump_section(start_new)
433d966dc65SMauro Carvalho Chehab
434485f6f79SMauro Carvalho Chehab    # TODO: rename it to store_declaration after removal of kernel-doc.pl
435d966dc65SMauro Carvalho Chehab    def output_declaration(self, dtype, name, **args):
436d966dc65SMauro Carvalho Chehab        """
43750206750SMauro Carvalho Chehab        Store the entry into an entry array.
438d966dc65SMauro Carvalho Chehab
43950206750SMauro Carvalho Chehab        The actual output and output filters will be handled elsewhere.
440d966dc65SMauro Carvalho Chehab        """
441d966dc65SMauro Carvalho Chehab
4422bd22194SMauro Carvalho Chehab        item = KdocItem(name, self.fname, dtype,
4432bd22194SMauro Carvalho Chehab                        self.entry.declaration_start_line, **args)
44460016e01SJonathan Corbet        item.warnings = self.entry.warnings
445d966dc65SMauro Carvalho Chehab
446d966dc65SMauro Carvalho Chehab        # Drop empty sections
447485f6f79SMauro Carvalho Chehab        # TODO: improve empty sections logic to emit warnings
4488d733875SJonathan Corbet        sections = self.entry.sections
449d966dc65SMauro Carvalho Chehab        for section in ["Description", "Return"]:
4508d9d1229SJonathan Corbet            if section in sections and not sections[section].rstrip():
451d966dc65SMauro Carvalho Chehab                del sections[section]
4528d733875SJonathan Corbet        item.set_sections(sections, self.entry.section_start_lines)
453de6f7ac9SJonathan Corbet        item.set_params(self.entry.parameterlist, self.entry.parameterdescs,
454de6f7ac9SJonathan Corbet                        self.entry.parametertypes,
455de6f7ac9SJonathan Corbet                        self.entry.parameterdesc_start_lines)
456703f9074SJonathan Corbet        self.entries.append(item)
457d966dc65SMauro Carvalho Chehab
458d966dc65SMauro Carvalho Chehab        self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args))
459d966dc65SMauro Carvalho Chehab
460292eca31SMauro Carvalho Chehab    def emit_unused_warnings(self):
461292eca31SMauro Carvalho Chehab        """
462292eca31SMauro Carvalho Chehab        When the parser fails to produce a valid entry, it places some
463292eca31SMauro Carvalho Chehab        warnings under `entry.warnings` that will be discarded when resetting
464292eca31SMauro Carvalho Chehab        the state.
465292eca31SMauro Carvalho Chehab
466292eca31SMauro Carvalho Chehab        Ensure that those warnings are not lost.
467292eca31SMauro Carvalho Chehab
468292eca31SMauro Carvalho Chehab        .. note::
469292eca31SMauro Carvalho Chehab
470292eca31SMauro Carvalho Chehab              Because we are calling `config.warning()` here, those
471292eca31SMauro Carvalho Chehab              warnings are not filtered by the `-W` parameters: they will all
472292eca31SMauro Carvalho Chehab              be produced even when `-Wreturn`, `-Wshort-desc`, and/or
473292eca31SMauro Carvalho Chehab              `-Wcontents-before-sections` are used.
474292eca31SMauro Carvalho Chehab
475292eca31SMauro Carvalho Chehab              Allowing those warnings to be filtered is complex, because it
476292eca31SMauro Carvalho Chehab              would require storing them in a buffer and then filtering them
477292eca31SMauro Carvalho Chehab              during the output step of the code, depending on the
478292eca31SMauro Carvalho Chehab              selected symbols.
479292eca31SMauro Carvalho Chehab        """
480292eca31SMauro Carvalho Chehab        if self.entry and self.entry not in self.entries:
481292eca31SMauro Carvalho Chehab            for log_msg in self.entry.warnings:
482292eca31SMauro Carvalho Chehab                self.config.warning(log_msg)
483292eca31SMauro Carvalho Chehab
484d966dc65SMauro Carvalho Chehab    def reset_state(self, ln):
485d966dc65SMauro Carvalho Chehab        """
486d966dc65SMauro Carvalho Chehab        Ancillary routine to create a new entry. It initializes all
487d966dc65SMauro Carvalho Chehab        variables used by the state machine.
488d966dc65SMauro Carvalho Chehab        """
489d966dc65SMauro Carvalho Chehab
490292eca31SMauro Carvalho Chehab        self.emit_unused_warnings()
491469c1c9eSAndy Shevchenko
4922bd22194SMauro Carvalho Chehab        self.entry = KernelEntry(self.config, self.fname, ln)
493d966dc65SMauro Carvalho Chehab
494d966dc65SMauro Carvalho Chehab        # State flags
495f9cdbc57SMauro Carvalho Chehab        self.state = state.NORMAL
496d966dc65SMauro Carvalho Chehab
497d966dc65SMauro Carvalho Chehab    def push_parameter(self, ln, decl_type, param, dtype,
498d966dc65SMauro Carvalho Chehab                       org_arg, declaration_name):
499d966dc65SMauro Carvalho Chehab        """
500d966dc65SMauro Carvalho Chehab        Store parameters and their descriptions at self.entry.
501d966dc65SMauro Carvalho Chehab        """
502d966dc65SMauro Carvalho Chehab
503d966dc65SMauro Carvalho Chehab        if self.entry.anon_struct_union and dtype == "" and param == "}":
504d966dc65SMauro Carvalho Chehab            return  # Ignore the ending }; from anonymous struct/union
505d966dc65SMauro Carvalho Chehab
506d966dc65SMauro Carvalho Chehab        self.entry.anon_struct_union = False
507d966dc65SMauro Carvalho Chehab
50804a383ceSMauro Carvalho Chehab        param = KernRe(r'[\[\)].*').sub('', param, count=1)
509d966dc65SMauro Carvalho Chehab
510e214cca3SJonathan Corbet        #
511e214cca3SJonathan Corbet        # Look at various "anonymous type" cases.
512e214cca3SJonathan Corbet        #
513e214cca3SJonathan Corbet        if dtype == '':
514e214cca3SJonathan Corbet            if param.endswith("..."):
515e214cca3SJonathan Corbet                if len(param) > 3: # there is a name provided, use that
516d966dc65SMauro Carvalho Chehab                    param = param[:-3]
517e214cca3SJonathan Corbet                if not self.entry.parameterdescs.get(param):
518d966dc65SMauro Carvalho Chehab                    self.entry.parameterdescs[param] = "variable arguments"
519d966dc65SMauro Carvalho Chehab
520e214cca3SJonathan Corbet            elif (not param) or param == "void":
521d966dc65SMauro Carvalho Chehab                param = "void"
522d966dc65SMauro Carvalho Chehab                self.entry.parameterdescs[param] = "no arguments"
523d966dc65SMauro Carvalho Chehab
524e214cca3SJonathan Corbet            elif param in ["struct", "union"]:
525d966dc65SMauro Carvalho Chehab                # Handle unnamed (anonymous) union or struct
526d966dc65SMauro Carvalho Chehab                dtype = param
527d966dc65SMauro Carvalho Chehab                param = "{unnamed_" + param + "}"
528d966dc65SMauro Carvalho Chehab                self.entry.parameterdescs[param] = "anonymous\n"
529d966dc65SMauro Carvalho Chehab                self.entry.anon_struct_union = True
530d966dc65SMauro Carvalho Chehab
531d966dc65SMauro Carvalho Chehab        # Warn if parameter has no description
532d966dc65SMauro Carvalho Chehab        # (but ignore ones starting with # as these are not parameters
533d966dc65SMauro Carvalho Chehab        # but inline preprocessor statements)
534d966dc65SMauro Carvalho Chehab        if param not in self.entry.parameterdescs and not param.startswith("#"):
535d966dc65SMauro Carvalho Chehab            self.entry.parameterdescs[param] = self.undescribed
536d966dc65SMauro Carvalho Chehab
5379cbc2d3bSMauro Carvalho Chehab            if "." not in param:
538d966dc65SMauro Carvalho Chehab                if decl_type == 'function':
539d966dc65SMauro Carvalho Chehab                    dname = f"{decl_type} parameter"
540d966dc65SMauro Carvalho Chehab                else:
541d966dc65SMauro Carvalho Chehab                    dname = f"{decl_type} member"
542d966dc65SMauro Carvalho Chehab
543e3b42e94SMauro Carvalho Chehab                self.emit_msg(ln,
544d966dc65SMauro Carvalho Chehab                              f"{dname} '{param}' not described in '{declaration_name}'")
545d966dc65SMauro Carvalho Chehab
546d966dc65SMauro Carvalho Chehab        # Strip spaces from param so that it is one continuous string on
547d966dc65SMauro Carvalho Chehab        # parameterlist. This fixes a problem where check_sections()
548d966dc65SMauro Carvalho Chehab        # cannot find a parameter like "addr[6 + 2]" because it actually
549d966dc65SMauro Carvalho Chehab        # appears as "addr[6", "+", "2]" on the parameter list.
550d966dc65SMauro Carvalho Chehab        # However, it's better to maintain the param string unchanged for
551d966dc65SMauro Carvalho Chehab        # output, so just weaken the string compare in check_sections()
552d966dc65SMauro Carvalho Chehab        # to ignore "[blah" in a parameter string.
553d966dc65SMauro Carvalho Chehab
554d966dc65SMauro Carvalho Chehab        self.entry.parameterlist.append(param)
55504a383ceSMauro Carvalho Chehab        org_arg = KernRe(r'\s\s+').sub(' ', org_arg)
556d966dc65SMauro Carvalho Chehab        self.entry.parametertypes[param] = org_arg
557d966dc65SMauro Carvalho Chehab
558d966dc65SMauro Carvalho Chehab
559d966dc65SMauro Carvalho Chehab    def create_parameter_list(self, ln, decl_type, args,
560d966dc65SMauro Carvalho Chehab                              splitter, declaration_name):
561d966dc65SMauro Carvalho Chehab        """
562d966dc65SMauro Carvalho Chehab        Creates a list of parameters, storing them at self.entry.
563d966dc65SMauro Carvalho Chehab        """
564d966dc65SMauro Carvalho Chehab
565d966dc65SMauro Carvalho Chehab        # temporarily replace all commas inside function pointer definition
56604a383ceSMauro Carvalho Chehab        arg_expr = KernRe(r'(\([^\),]+),')
567d966dc65SMauro Carvalho Chehab        while arg_expr.search(args):
568d966dc65SMauro Carvalho Chehab            args = arg_expr.sub(r"\1#", args)
569d966dc65SMauro Carvalho Chehab
570d966dc65SMauro Carvalho Chehab        for arg in args.split(splitter):
571d966dc65SMauro Carvalho Chehab            # Ignore argument attributes
57204a383ceSMauro Carvalho Chehab            arg = KernRe(r'\sPOS0?\s').sub(' ', arg)
573d966dc65SMauro Carvalho Chehab
574d966dc65SMauro Carvalho Chehab            # Strip leading/trailing spaces
575d966dc65SMauro Carvalho Chehab            arg = arg.strip()
57604a383ceSMauro Carvalho Chehab            arg = KernRe(r'\s+').sub(' ', arg, count=1)
577d966dc65SMauro Carvalho Chehab
578d966dc65SMauro Carvalho Chehab            if arg.startswith('#'):
579d966dc65SMauro Carvalho Chehab                # Treat preprocessor directive as a typeless variable just to fill
580d966dc65SMauro Carvalho Chehab                # corresponding data structures "correctly". Catch it later in
581d966dc65SMauro Carvalho Chehab                # output_* subs.
582d966dc65SMauro Carvalho Chehab
583d966dc65SMauro Carvalho Chehab                # Treat preprocessor directive as a typeless variable
584d966dc65SMauro Carvalho Chehab                self.push_parameter(ln, decl_type, arg, "",
585d966dc65SMauro Carvalho Chehab                                    "", declaration_name)
586e5d91662SJonathan Corbet            #
587e5d91662SJonathan Corbet            # The pointer-to-function case.
588e5d91662SJonathan Corbet            #
58904a383ceSMauro Carvalho Chehab            elif KernRe(r'\(.+\)\s*\(').search(arg):
590d966dc65SMauro Carvalho Chehab                arg = arg.replace('#', ',')
591e5d91662SJonathan Corbet                r = KernRe(r'[^\(]+\(\*?\s*'  # Everything up to "(*"
592e5d91662SJonathan Corbet                           r'([\w\[\].]*)'    # Capture the name and possible [array]
593e5d91662SJonathan Corbet                           r'\s*\)')	      # Make sure the trailing ")" is there
594d966dc65SMauro Carvalho Chehab                if r.match(arg):
595d966dc65SMauro Carvalho Chehab                    param = r.group(1)
596d966dc65SMauro Carvalho Chehab                else:
597e3b42e94SMauro Carvalho Chehab                    self.emit_msg(ln, f"Invalid param: {arg}")
598d966dc65SMauro Carvalho Chehab                    param = arg
599e5d91662SJonathan Corbet                dtype = arg.replace(param, '')
600e5d91662SJonathan Corbet                self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
601bf6b310dSJonathan Corbet            #
602bf6b310dSJonathan Corbet            # The array-of-pointers case.  Dig the parameter name out from the middle
603bf6b310dSJonathan Corbet            # of the declaration.
604bf6b310dSJonathan Corbet            #
60504a383ceSMauro Carvalho Chehab            elif KernRe(r'\(.+\)\s*\[').search(arg):
606bf6b310dSJonathan Corbet                r = KernRe(r'[^\(]+\(\s*\*\s*'		# Up to "(" and maybe "*"
607bf6b310dSJonathan Corbet                           r'([\w.]*?)'			# The actual pointer name
608bf6b310dSJonathan Corbet                           r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion]
609d966dc65SMauro Carvalho Chehab                if r.match(arg):
610d966dc65SMauro Carvalho Chehab                    param = r.group(1)
611d966dc65SMauro Carvalho Chehab                else:
612e3b42e94SMauro Carvalho Chehab                    self.emit_msg(ln, f"Invalid param: {arg}")
613d966dc65SMauro Carvalho Chehab                    param = arg
614bf6b310dSJonathan Corbet                dtype = arg.replace(param, '')
615bf6b310dSJonathan Corbet                self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
616d966dc65SMauro Carvalho Chehab            elif arg:
617f51b42b9SJonathan Corbet                #
618f51b42b9SJonathan Corbet                # Clean up extraneous spaces and split the string at commas; the first
619f51b42b9SJonathan Corbet                # element of the resulting list will also include the type information.
620f51b42b9SJonathan Corbet                #
62104a383ceSMauro Carvalho Chehab                arg = KernRe(r'\s*:\s*').sub(":", arg)
62204a383ceSMauro Carvalho Chehab                arg = KernRe(r'\s*\[').sub('[', arg)
62304a383ceSMauro Carvalho Chehab                args = KernRe(r'\s*,\s*').split(arg)
624d966dc65SMauro Carvalho Chehab                args[0] = re.sub(r'(\*+)\s*', r' \1', args[0])
62505d72fe0SJonathan Corbet                #
62605d72fe0SJonathan Corbet                # args[0] has a string of "type a".  If "a" includes an [array]
62705d72fe0SJonathan Corbet                # declaration, we want to not be fooled by any white space inside
62805d72fe0SJonathan Corbet                # the brackets, so detect and handle that case specially.
62905d72fe0SJonathan Corbet                #
63005d72fe0SJonathan Corbet                r = KernRe(r'^([^[\]]*\s+)(.*)$')
63105d72fe0SJonathan Corbet                if r.match(args[0]):
63205d72fe0SJonathan Corbet                    args[0] = r.group(2)
63305d72fe0SJonathan Corbet                    dtype = r.group(1)
634d966dc65SMauro Carvalho Chehab                else:
63505d72fe0SJonathan Corbet                    # No space in args[0]; this seems wrong but preserves previous behavior
63605d72fe0SJonathan Corbet                    dtype = ''
637d966dc65SMauro Carvalho Chehab
638670ec733SJonathan Corbet                bitfield_re = KernRe(r'(.*?):(\w+)')
639d966dc65SMauro Carvalho Chehab                for param in args:
6408f05fbc5SJonathan Corbet                    #
6418f05fbc5SJonathan Corbet                    # For pointers, shift the star(s) from the variable name to the
6428f05fbc5SJonathan Corbet                    # type declaration.
6438f05fbc5SJonathan Corbet                    #
64404a383ceSMauro Carvalho Chehab                    r = KernRe(r'^(\*+)\s*(.*)')
645670ec733SJonathan Corbet                    if r.match(param):
646d966dc65SMauro Carvalho Chehab                        self.push_parameter(ln, decl_type, r.group(2),
647d966dc65SMauro Carvalho Chehab                                            f"{dtype} {r.group(1)}",
648d966dc65SMauro Carvalho Chehab                                            arg, declaration_name)
6498f05fbc5SJonathan Corbet                    #
6508f05fbc5SJonathan Corbet                    # Perform a similar shift for bitfields.
6518f05fbc5SJonathan Corbet                    #
652670ec733SJonathan Corbet                    elif bitfield_re.search(param):
653d966dc65SMauro Carvalho Chehab                        if dtype != "":  # Skip unnamed bit-fields
654670ec733SJonathan Corbet                            self.push_parameter(ln, decl_type, bitfield_re.group(1),
655670ec733SJonathan Corbet                                                f"{dtype}:{bitfield_re.group(2)}",
656d966dc65SMauro Carvalho Chehab                                                arg, declaration_name)
657d966dc65SMauro Carvalho Chehab                    else:
658d966dc65SMauro Carvalho Chehab                        self.push_parameter(ln, decl_type, param, dtype,
659d966dc65SMauro Carvalho Chehab                                            arg, declaration_name)
660d966dc65SMauro Carvalho Chehab
661636d4d9eSJonathan Corbet    def check_sections(self, ln, decl_name, decl_type):
662d966dc65SMauro Carvalho Chehab        """
663d966dc65SMauro Carvalho Chehab        Check for errors inside sections, emitting warnings if not found
664d966dc65SMauro Carvalho Chehab        parameters are described.
665d966dc65SMauro Carvalho Chehab        """
666636d4d9eSJonathan Corbet        for section in self.entry.sections:
667636d4d9eSJonathan Corbet            if section not in self.entry.parameterlist and \
668636d4d9eSJonathan Corbet               not known_sections.search(section):
669d966dc65SMauro Carvalho Chehab                if decl_type == 'function':
670d966dc65SMauro Carvalho Chehab                    dname = f"{decl_type} parameter"
671d966dc65SMauro Carvalho Chehab                else:
672d966dc65SMauro Carvalho Chehab                    dname = f"{decl_type} member"
673e3b42e94SMauro Carvalho Chehab                self.emit_msg(ln,
674636d4d9eSJonathan Corbet                              f"Excess {dname} '{section}' description in '{decl_name}'")
675d966dc65SMauro Carvalho Chehab
676d966dc65SMauro Carvalho Chehab    def check_return_section(self, ln, declaration_name, return_type):
677d966dc65SMauro Carvalho Chehab        """
678d966dc65SMauro Carvalho Chehab        If the function doesn't return void, warns about the lack of a
679d966dc65SMauro Carvalho Chehab        return description.
680d966dc65SMauro Carvalho Chehab        """
681d966dc65SMauro Carvalho Chehab
682d966dc65SMauro Carvalho Chehab        if not self.config.wreturn:
683d966dc65SMauro Carvalho Chehab            return
684d966dc65SMauro Carvalho Chehab
685d966dc65SMauro Carvalho Chehab        # Ignore an empty return type (It's a macro)
686d966dc65SMauro Carvalho Chehab        # Ignore functions with a "void" return type (but not "void *")
68704a383ceSMauro Carvalho Chehab        if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type):
688d966dc65SMauro Carvalho Chehab            return
689d966dc65SMauro Carvalho Chehab
690d966dc65SMauro Carvalho Chehab        if not self.entry.sections.get("Return", None):
691e3b42e94SMauro Carvalho Chehab            self.emit_msg(ln,
692d966dc65SMauro Carvalho Chehab                          f"No description found for return value of '{declaration_name}'")
693d966dc65SMauro Carvalho Chehab
6940f734412SJonathan Corbet    def split_struct_proto(self, proto):
69550206750SMauro Carvalho Chehab        """
69650206750SMauro Carvalho Chehab        Split apart a structure prototype; returns (struct|union, name,
69750206750SMauro Carvalho Chehab        members) or ``None``.
69850206750SMauro Carvalho Chehab        """
69950206750SMauro Carvalho Chehab
700d966dc65SMauro Carvalho Chehab        type_pattern = r'(struct|union)'
701d966dc65SMauro Carvalho Chehab        qualifiers = [
702d966dc65SMauro Carvalho Chehab            "__attribute__",
703d966dc65SMauro Carvalho Chehab            "__packed",
704d966dc65SMauro Carvalho Chehab            "__aligned",
705d966dc65SMauro Carvalho Chehab            "____cacheline_aligned_in_smp",
706d966dc65SMauro Carvalho Chehab            "____cacheline_aligned",
707d966dc65SMauro Carvalho Chehab        ]
708d966dc65SMauro Carvalho Chehab        definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?"
709d966dc65SMauro Carvalho Chehab
71004a383ceSMauro Carvalho Chehab        r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body)
711d966dc65SMauro Carvalho Chehab        if r.search(proto):
7120f734412SJonathan Corbet            return (r.group(1), r.group(2), r.group(3))
713d966dc65SMauro Carvalho Chehab        else:
71404a383ceSMauro Carvalho Chehab            r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;')
715d966dc65SMauro Carvalho Chehab            if r.search(proto):
7160f734412SJonathan Corbet                return (r.group(1), r.group(3), r.group(2))
7170f734412SJonathan Corbet        return None
71850206750SMauro Carvalho Chehab
71977e3c875SJonathan Corbet    def rewrite_struct_members(self, members):
72050206750SMauro Carvalho Chehab        """
72150206750SMauro Carvalho Chehab        Process ``struct``/``union`` members from the most deeply nested
72250206750SMauro Carvalho Chehab        outward.
72350206750SMauro Carvalho Chehab
72450206750SMauro Carvalho Chehab        Rewrite the members of a ``struct`` or ``union`` for easier formatting
72550206750SMauro Carvalho Chehab        later on. Among other things, this function will turn a member like::
72650206750SMauro Carvalho Chehab
72750206750SMauro Carvalho Chehab          struct { inner_members; } foo;
72850206750SMauro Carvalho Chehab
72950206750SMauro Carvalho Chehab        into::
73050206750SMauro Carvalho Chehab
73150206750SMauro Carvalho Chehab          struct foo; inner_members;
73250206750SMauro Carvalho Chehab        """
73350206750SMauro Carvalho Chehab
734d966dc65SMauro Carvalho Chehab        #
73550206750SMauro Carvalho Chehab        # The trick is in the ``^{`` below - it prevents a match of an outer
73650206750SMauro Carvalho Chehab        # ``struct``/``union`` until the inner one has been munged
73750206750SMauro Carvalho Chehab        # (removing the ``{`` in the process).
738d966dc65SMauro Carvalho Chehab        #
739a8c4b0a8SJonathan Corbet        struct_members = KernRe(r'(struct|union)'   # 0: declaration type
740a8c4b0a8SJonathan Corbet                                r'([^\{\};]+)' 	    # 1: possible name
741a8c4b0a8SJonathan Corbet                                r'(\{)'
742a8c4b0a8SJonathan Corbet                                r'([^\{\}]*)'       # 3: Contents of declaration
743a8c4b0a8SJonathan Corbet                                r'(\})'
744a8c4b0a8SJonathan Corbet                                r'([^\{\};]*)(;)')  # 5: Remaining stuff after declaration
745d966dc65SMauro Carvalho Chehab        tuples = struct_members.findall(members)
746f8208676SJonathan Corbet        while tuples:
747d966dc65SMauro Carvalho Chehab            for t in tuples:
748d966dc65SMauro Carvalho Chehab                newmember = ""
749e6dd4e2aSJonathan Corbet                oldmember = "".join(t) # Reconstruct the original formatting
750e6dd4e2aSJonathan Corbet                dtype, name, lbr, content, rbr, rest, semi = t
751e6dd4e2aSJonathan Corbet                #
752e6dd4e2aSJonathan Corbet                # Pass through each field name, normalizing the form and formatting.
753e6dd4e2aSJonathan Corbet                #
754e6dd4e2aSJonathan Corbet                for s_id in rest.split(','):
755d966dc65SMauro Carvalho Chehab                    s_id = s_id.strip()
756e6dd4e2aSJonathan Corbet                    newmember += f"{dtype} {s_id}; "
757e6dd4e2aSJonathan Corbet                    #
758e6dd4e2aSJonathan Corbet                    # Remove bitfield/array/pointer info, getting the bare name.
759e6dd4e2aSJonathan Corbet                    #
76004a383ceSMauro Carvalho Chehab                    s_id = KernRe(r'[:\[].*').sub('', s_id)
76104a383ceSMauro Carvalho Chehab                    s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)
762e6dd4e2aSJonathan Corbet                    #
763e6dd4e2aSJonathan Corbet                    # Pass through the members of this inner structure/union.
764e6dd4e2aSJonathan Corbet                    #
765d966dc65SMauro Carvalho Chehab                    for arg in content.split(';'):
766d966dc65SMauro Carvalho Chehab                        arg = arg.strip()
767e6dd4e2aSJonathan Corbet                        #
768e6dd4e2aSJonathan Corbet                        # Look for (type)(*name)(args) - pointer to function
769e6dd4e2aSJonathan Corbet                        #
7705fd513f0SJonathan Corbet                        r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)')
771d966dc65SMauro Carvalho Chehab                        if r.match(arg):
772e6dd4e2aSJonathan Corbet                            dtype, name, extra = r.group(1), r.group(2), r.group(3)
773d966dc65SMauro Carvalho Chehab                            # Pointer-to-function
774d966dc65SMauro Carvalho Chehab                            if not s_id:
775d966dc65SMauro Carvalho Chehab                                # Anonymous struct/union
776d966dc65SMauro Carvalho Chehab                                newmember += f"{dtype}{name}{extra}; "
777d966dc65SMauro Carvalho Chehab                            else:
778d966dc65SMauro Carvalho Chehab                                newmember += f"{dtype}{s_id}.{name}{extra}; "
779e6dd4e2aSJonathan Corbet                        #
780e6dd4e2aSJonathan Corbet                        # Otherwise a non-function member.
781e6dd4e2aSJonathan Corbet                        #
782d966dc65SMauro Carvalho Chehab                        else:
783e6dd4e2aSJonathan Corbet                            #
784e6dd4e2aSJonathan Corbet                            # Remove bitmap and array portions and spaces around commas
785e6dd4e2aSJonathan Corbet                            #
78604a383ceSMauro Carvalho Chehab                            arg = KernRe(r':\s*\d+\s*').sub('', arg)
78704a383ceSMauro Carvalho Chehab                            arg = KernRe(r'\[.*\]').sub('', arg)
78804a383ceSMauro Carvalho Chehab                            arg = KernRe(r'\s*,\s*').sub(',', arg)
789e6dd4e2aSJonathan Corbet                            #
790e6dd4e2aSJonathan Corbet                            # Look for a normal decl - "type name[,name...]"
791e6dd4e2aSJonathan Corbet                            #
79204a383ceSMauro Carvalho Chehab                            r = KernRe(r'(.*)\s+([\S+,]+)')
793d966dc65SMauro Carvalho Chehab                            if r.search(arg):
794e6dd4e2aSJonathan Corbet                                for name in r.group(2).split(','):
795e6dd4e2aSJonathan Corbet                                    name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name)
796d966dc65SMauro Carvalho Chehab                                    if not s_id:
797d966dc65SMauro Carvalho Chehab                                        # Anonymous struct/union
798e6dd4e2aSJonathan Corbet                                        newmember += f"{r.group(1)} {name}; "
799d966dc65SMauro Carvalho Chehab                                    else:
800e6dd4e2aSJonathan Corbet                                        newmember += f"{r.group(1)} {s_id}.{name}; "
801e6dd4e2aSJonathan Corbet                            else:
802e6dd4e2aSJonathan Corbet                                newmember += f"{arg}; "
803e6dd4e2aSJonathan Corbet                #
804e6dd4e2aSJonathan Corbet                # At the end of the s_id loop, replace the original declaration with
805e6dd4e2aSJonathan Corbet                # the munged version.
806e6dd4e2aSJonathan Corbet                #
807d966dc65SMauro Carvalho Chehab                members = members.replace(oldmember, newmember)
808e6dd4e2aSJonathan Corbet            #
809e6dd4e2aSJonathan Corbet            # End of the tuple loop - search again and see if there are outer members
810e6dd4e2aSJonathan Corbet            # that now turn up.
811e6dd4e2aSJonathan Corbet            #
812f8208676SJonathan Corbet            tuples = struct_members.findall(members)
81377e3c875SJonathan Corbet        return members
814d966dc65SMauro Carvalho Chehab
81523c47b09SJonathan Corbet    def format_struct_decl(self, declaration):
81650206750SMauro Carvalho Chehab        """
81750206750SMauro Carvalho Chehab        Format the ``struct`` declaration into a standard form for inclusion
81850206750SMauro Carvalho Chehab        in the resulting docs.
81950206750SMauro Carvalho Chehab        """
82050206750SMauro Carvalho Chehab
82123c47b09SJonathan Corbet        #
82223c47b09SJonathan Corbet        # Insert newlines, get rid of extra spaces.
82323c47b09SJonathan Corbet        #
82404a383ceSMauro Carvalho Chehab        declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration)
82504a383ceSMauro Carvalho Chehab        declaration = KernRe(r'\}\s+;').sub('};', declaration)
82623c47b09SJonathan Corbet        #
82723c47b09SJonathan Corbet        # Format inline enums with each member on its own line.
82823c47b09SJonathan Corbet        #
82904a383ceSMauro Carvalho Chehab        r = KernRe(r'(enum\s+\{[^\}]+),([^\n])')
83023c47b09SJonathan Corbet        while r.search(declaration):
831d966dc65SMauro Carvalho Chehab            declaration = r.sub(r'\1,\n\2', declaration)
83223c47b09SJonathan Corbet        #
83323c47b09SJonathan Corbet        # Now go through and supply the right number of tabs
83423c47b09SJonathan Corbet        # for each line.
83523c47b09SJonathan Corbet        #
836d966dc65SMauro Carvalho Chehab        def_args = declaration.split('\n')
837d966dc65SMauro Carvalho Chehab        level = 1
838d966dc65SMauro Carvalho Chehab        declaration = ""
839d966dc65SMauro Carvalho Chehab        for clause in def_args:
84023c47b09SJonathan Corbet            clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1)
84123c47b09SJonathan Corbet            if clause:
842d966dc65SMauro Carvalho Chehab                if '}' in clause and level > 1:
843d966dc65SMauro Carvalho Chehab                    level -= 1
84423c47b09SJonathan Corbet                if not clause.startswith('#'):
845d966dc65SMauro Carvalho Chehab                    declaration += "\t" * level
846d966dc65SMauro Carvalho Chehab                declaration += "\t" + clause + "\n"
847d966dc65SMauro Carvalho Chehab                if "{" in clause and "}" not in clause:
848d966dc65SMauro Carvalho Chehab                    level += 1
84923c47b09SJonathan Corbet        return declaration
850d966dc65SMauro Carvalho Chehab
85123c47b09SJonathan Corbet
85277e3c875SJonathan Corbet    def dump_struct(self, ln, proto):
85377e3c875SJonathan Corbet        """
85450206750SMauro Carvalho Chehab        Store an entry for a ``struct`` or ``union``
85577e3c875SJonathan Corbet        """
85677e3c875SJonathan Corbet        #
85777e3c875SJonathan Corbet        # Do the basic parse to get the pieces of the declaration.
85877e3c875SJonathan Corbet        #
85977e3c875SJonathan Corbet        struct_parts = self.split_struct_proto(proto)
86077e3c875SJonathan Corbet        if not struct_parts:
86177e3c875SJonathan Corbet            self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
86277e3c875SJonathan Corbet            return
86377e3c875SJonathan Corbet        decl_type, declaration_name, members = struct_parts
86477e3c875SJonathan Corbet
86577e3c875SJonathan Corbet        if self.entry.identifier != declaration_name:
86677e3c875SJonathan Corbet            self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. "
86777e3c875SJonathan Corbet                          f"Prototype was for {decl_type} {declaration_name} instead\n")
86877e3c875SJonathan Corbet            return
86977e3c875SJonathan Corbet        #
87077e3c875SJonathan Corbet        # Go through the list of members applying all of our transformations.
87177e3c875SJonathan Corbet        #
87277e3c875SJonathan Corbet        members = trim_private_members(members)
873a2752f8cSJonathan Corbet        members = apply_transforms(struct_xforms, members)
87477e3c875SJonathan Corbet
87577e3c875SJonathan Corbet        nested = NestedMatch()
87677e3c875SJonathan Corbet        for search, sub in struct_nested_prefixes:
87777e3c875SJonathan Corbet            members = nested.sub(search, sub, members)
878e282303eSJonathan Corbet        #
879e282303eSJonathan Corbet        # Deal with embedded struct and union members, and drop enums entirely.
880e282303eSJonathan Corbet        #
88177e3c875SJonathan Corbet        declaration = members
88277e3c875SJonathan Corbet        members = self.rewrite_struct_members(members)
883d966dc65SMauro Carvalho Chehab        members = re.sub(r'(\{[^\{\}]*\})', '', members)
884e282303eSJonathan Corbet        #
885e282303eSJonathan Corbet        # Output the result and we are done.
886e282303eSJonathan Corbet        #
887d966dc65SMauro Carvalho Chehab        self.create_parameter_list(ln, decl_type, members, ';',
888d966dc65SMauro Carvalho Chehab                                   declaration_name)
889636d4d9eSJonathan Corbet        self.check_sections(ln, declaration_name, decl_type)
890d966dc65SMauro Carvalho Chehab        self.output_declaration(decl_type, declaration_name,
89123c47b09SJonathan Corbet                                definition=self.format_struct_decl(declaration),
892d966dc65SMauro Carvalho Chehab                                purpose=self.entry.declaration_purpose)
893d966dc65SMauro Carvalho Chehab
894d966dc65SMauro Carvalho Chehab    def dump_enum(self, ln, proto):
895d966dc65SMauro Carvalho Chehab        """
89650206750SMauro Carvalho Chehab        Store an ``enum`` inside self.entries array.
897d966dc65SMauro Carvalho Chehab        """
8986656ae4dSJonathan Corbet        #
8996656ae4dSJonathan Corbet        # Strip preprocessor directives.  Note that this depends on the
9006656ae4dSJonathan Corbet        # trailing semicolon we added in process_proto_type().
9016656ae4dSJonathan Corbet        #
90204a383ceSMauro Carvalho Chehab        proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
903d1af2889SJonathan Corbet        #
904d1af2889SJonathan Corbet        # Parse out the name and members of the enum.  Typedef form first.
905d1af2889SJonathan Corbet        #
90604a383ceSMauro Carvalho Chehab        r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
907d966dc65SMauro Carvalho Chehab        if r.search(proto):
908d966dc65SMauro Carvalho Chehab            declaration_name = r.group(2)
9096656ae4dSJonathan Corbet            members = trim_private_members(r.group(1))
910d1af2889SJonathan Corbet        #
911d1af2889SJonathan Corbet        # Failing that, look for a straight enum
912d1af2889SJonathan Corbet        #
913d966dc65SMauro Carvalho Chehab        else:
91404a383ceSMauro Carvalho Chehab            r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
915d966dc65SMauro Carvalho Chehab            if r.match(proto):
916d966dc65SMauro Carvalho Chehab                declaration_name = r.group(1)
9176656ae4dSJonathan Corbet                members = trim_private_members(r.group(2))
918d1af2889SJonathan Corbet        #
919d1af2889SJonathan Corbet        # OK, this isn't going to work.
920d1af2889SJonathan Corbet        #
921d1af2889SJonathan Corbet            else:
922e3b42e94SMauro Carvalho Chehab                self.emit_msg(ln, f"{proto}: error: Cannot parse enum!")
923d966dc65SMauro Carvalho Chehab                return
924d1af2889SJonathan Corbet        #
925d1af2889SJonathan Corbet        # Make sure we found what we were expecting.
926d1af2889SJonathan Corbet        #
927d966dc65SMauro Carvalho Chehab        if self.entry.identifier != declaration_name:
928d966dc65SMauro Carvalho Chehab            if self.entry.identifier == "":
929e3b42e94SMauro Carvalho Chehab                self.emit_msg(ln,
930d966dc65SMauro Carvalho Chehab                              f"{proto}: wrong kernel-doc identifier on prototype")
931d966dc65SMauro Carvalho Chehab            else:
932e3b42e94SMauro Carvalho Chehab                self.emit_msg(ln,
933d1af2889SJonathan Corbet                              f"expecting prototype for enum {self.entry.identifier}. "
934d1af2889SJonathan Corbet                              f"Prototype was for enum {declaration_name} instead")
935d966dc65SMauro Carvalho Chehab            return
936d966dc65SMauro Carvalho Chehab
937d966dc65SMauro Carvalho Chehab        if not declaration_name:
938d966dc65SMauro Carvalho Chehab            declaration_name = "(anonymous)"
939d1af2889SJonathan Corbet        #
940d1af2889SJonathan Corbet        # Parse out the name of each enum member, and verify that we
941d1af2889SJonathan Corbet        # have a description for it.
942d1af2889SJonathan Corbet        #
943d966dc65SMauro Carvalho Chehab        member_set = set()
944d1af2889SJonathan Corbet        members = KernRe(r'\([^;)]*\)').sub('', members)
945d966dc65SMauro Carvalho Chehab        for arg in members.split(','):
946d966dc65SMauro Carvalho Chehab            if not arg:
947d966dc65SMauro Carvalho Chehab                continue
94804a383ceSMauro Carvalho Chehab            arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
949d966dc65SMauro Carvalho Chehab            self.entry.parameterlist.append(arg)
950d966dc65SMauro Carvalho Chehab            if arg not in self.entry.parameterdescs:
951d966dc65SMauro Carvalho Chehab                self.entry.parameterdescs[arg] = self.undescribed
952e3b42e94SMauro Carvalho Chehab                self.emit_msg(ln,
953d966dc65SMauro Carvalho Chehab                              f"Enum value '{arg}' not described in enum '{declaration_name}'")
954d966dc65SMauro Carvalho Chehab            member_set.add(arg)
955d1af2889SJonathan Corbet        #
956d1af2889SJonathan Corbet        # Ensure that every described member actually exists in the enum.
957d1af2889SJonathan Corbet        #
958d966dc65SMauro Carvalho Chehab        for k in self.entry.parameterdescs:
959d966dc65SMauro Carvalho Chehab            if k not in member_set:
960e3b42e94SMauro Carvalho Chehab                self.emit_msg(ln,
96118182f97SRandy Dunlap                              f"Excess enum value '@{k}' description in '{declaration_name}'")
962d966dc65SMauro Carvalho Chehab
963d966dc65SMauro Carvalho Chehab        self.output_declaration('enum', declaration_name,
964d966dc65SMauro Carvalho Chehab                                purpose=self.entry.declaration_purpose)
965d966dc65SMauro Carvalho Chehab
9661045ec38SMauro Carvalho Chehab    def dump_var(self, ln, proto):
9671045ec38SMauro Carvalho Chehab        """
9681045ec38SMauro Carvalho Chehab        Store variables that are part of kAPI.
9691045ec38SMauro Carvalho Chehab        """
9701045ec38SMauro Carvalho Chehab        VAR_ATTRIBS = [
9711045ec38SMauro Carvalho Chehab            "extern",
9721045ec38SMauro Carvalho Chehab        ]
9731045ec38SMauro Carvalho Chehab        OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?"
9741045ec38SMauro Carvalho Chehab
9751045ec38SMauro Carvalho Chehab        sub_prefixes = [
9761045ec38SMauro Carvalho Chehab            (KernRe(r"__read_mostly"), ""),
9771045ec38SMauro Carvalho Chehab            (KernRe(r"__ro_after_init"), ""),
9781045ec38SMauro Carvalho Chehab            (KernRe(r"(?://.*)$"), ""),
9791045ec38SMauro Carvalho Chehab            (KernRe(r"(?:/\*.*\*/)"), ""),
9801045ec38SMauro Carvalho Chehab            (KernRe(r";$"), ""),
9811045ec38SMauro Carvalho Chehab            (KernRe(r"=.*"), ""),
9821045ec38SMauro Carvalho Chehab        ]
9831045ec38SMauro Carvalho Chehab
9841045ec38SMauro Carvalho Chehab        #
9851045ec38SMauro Carvalho Chehab        # Store the full prototype before modifying it
9861045ec38SMauro Carvalho Chehab        #
9871045ec38SMauro Carvalho Chehab        full_proto = proto
988bdd1cf87SMauro Carvalho Chehab        declaration_name = None
989bdd1cf87SMauro Carvalho Chehab
990bdd1cf87SMauro Carvalho Chehab        #
991bdd1cf87SMauro Carvalho Chehab        # Handle macro definitions
992bdd1cf87SMauro Carvalho Chehab        #
993bdd1cf87SMauro Carvalho Chehab        macro_prefixes = [
994bdd1cf87SMauro Carvalho Chehab            KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"),
995bdd1cf87SMauro Carvalho Chehab        ]
996bdd1cf87SMauro Carvalho Chehab
997bdd1cf87SMauro Carvalho Chehab        for r in macro_prefixes:
998bdd1cf87SMauro Carvalho Chehab            match = r.search(proto)
999bdd1cf87SMauro Carvalho Chehab            if match:
1000bdd1cf87SMauro Carvalho Chehab                declaration_name = match.group(1)
1001bdd1cf87SMauro Carvalho Chehab                break
10021045ec38SMauro Carvalho Chehab
10031045ec38SMauro Carvalho Chehab        #
10041045ec38SMauro Carvalho Chehab        # Drop comments and macros to have a pure C prototype
10051045ec38SMauro Carvalho Chehab        #
1006bdd1cf87SMauro Carvalho Chehab        if not declaration_name:
1007bdd1cf87SMauro Carvalho Chehab            for r, sub in sub_prefixes:
1008bdd1cf87SMauro Carvalho Chehab                proto = r.sub(sub, proto)
10091045ec38SMauro Carvalho Chehab
10101045ec38SMauro Carvalho Chehab        proto = proto.rstrip()
10111045ec38SMauro Carvalho Chehab
10121045ec38SMauro Carvalho Chehab        #
10131045ec38SMauro Carvalho Chehab        # Variable name is at the end of the declaration
10141045ec38SMauro Carvalho Chehab        #
10151045ec38SMauro Carvalho Chehab
1016aaacd70fSMauro Carvalho Chehab        default_val = None
1017aaacd70fSMauro Carvalho Chehab
10181045ec38SMauro Carvalho Chehab        r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
1019aaacd70fSMauro Carvalho Chehab        if r.match(proto):
1020bdd1cf87SMauro Carvalho Chehab            if not declaration_name:
10211045ec38SMauro Carvalho Chehab                declaration_name = r.group(1)
1022bdd1cf87SMauro Carvalho Chehab
10231045ec38SMauro Carvalho Chehab            default_val = r.group(2)
1024aaacd70fSMauro Carvalho Chehab        else:
1025aaacd70fSMauro Carvalho Chehab            r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
1026aaacd70fSMauro Carvalho Chehab        if r.match(proto):
1027aaacd70fSMauro Carvalho Chehab            default_val = r.group(1)
1028aaacd70fSMauro Carvalho Chehab
1029aaacd70fSMauro Carvalho Chehab        if not declaration_name:
1030aaacd70fSMauro Carvalho Chehab           self.emit_msg(ln,f"{proto}: can't parse variable")
1031aaacd70fSMauro Carvalho Chehab           return
1032aaacd70fSMauro Carvalho Chehab
10331045ec38SMauro Carvalho Chehab        if default_val:
10341045ec38SMauro Carvalho Chehab            default_val = default_val.lstrip("=").strip()
10351045ec38SMauro Carvalho Chehab
10361045ec38SMauro Carvalho Chehab        self.output_declaration("var", declaration_name,
10371045ec38SMauro Carvalho Chehab                                full_proto=full_proto,
10381045ec38SMauro Carvalho Chehab                                default_val=default_val,
10391045ec38SMauro Carvalho Chehab                                purpose=self.entry.declaration_purpose)
10401045ec38SMauro Carvalho Chehab
1041d966dc65SMauro Carvalho Chehab    def dump_declaration(self, ln, prototype):
1042d966dc65SMauro Carvalho Chehab        """
104350206750SMauro Carvalho Chehab        Store a data declaration inside self.entries array.
1044d966dc65SMauro Carvalho Chehab        """
1045d966dc65SMauro Carvalho Chehab
1046d966dc65SMauro Carvalho Chehab        if self.entry.decl_type == "enum":
1047d966dc65SMauro Carvalho Chehab            self.dump_enum(ln, prototype)
104808b8dc43SJonathan Corbet        elif self.entry.decl_type == "typedef":
1049d966dc65SMauro Carvalho Chehab            self.dump_typedef(ln, prototype)
105008b8dc43SJonathan Corbet        elif self.entry.decl_type in ["union", "struct"]:
1051d966dc65SMauro Carvalho Chehab            self.dump_struct(ln, prototype)
10521045ec38SMauro Carvalho Chehab        elif self.entry.decl_type == "var":
10531045ec38SMauro Carvalho Chehab            self.dump_var(ln, prototype)
105408b8dc43SJonathan Corbet        else:
105508b8dc43SJonathan Corbet            # This would be a bug
105608b8dc43SJonathan Corbet            self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')
1057d966dc65SMauro Carvalho Chehab
1058d966dc65SMauro Carvalho Chehab    def dump_function(self, ln, prototype):
1059d966dc65SMauro Carvalho Chehab        """
106050206750SMauro Carvalho Chehab        Store a function or function macro inside self.entries array.
1061d966dc65SMauro Carvalho Chehab        """
1062d966dc65SMauro Carvalho Chehab
1063370f4305SJonathan Corbet        found = func_macro = False
1064d966dc65SMauro Carvalho Chehab        return_type = ''
1065d966dc65SMauro Carvalho Chehab        decl_type = 'function'
10664c232a81SJonathan Corbet        #
10674c232a81SJonathan Corbet        # Apply the initial transformations.
10684c232a81SJonathan Corbet        #
1069a2752f8cSJonathan Corbet        prototype = apply_transforms(function_xforms, prototype)
1070370f4305SJonathan Corbet        #
1071370f4305SJonathan Corbet        # If we have a macro, remove the "#define" at the front.
1072370f4305SJonathan Corbet        #
107304a383ceSMauro Carvalho Chehab        new_proto = KernRe(r"^#\s*define\s+").sub("", prototype)
1074d966dc65SMauro Carvalho Chehab        if new_proto != prototype:
1075d966dc65SMauro Carvalho Chehab            prototype = new_proto
1076370f4305SJonathan Corbet            #
1077370f4305SJonathan Corbet            # Dispense with the simple "#define A B" case here; the key
1078370f4305SJonathan Corbet            # is the space after the name of the symbol being defined.
1079370f4305SJonathan Corbet            # NOTE that the seemingly misnamed "func_macro" indicates a
1080370f4305SJonathan Corbet            # macro *without* arguments.
1081370f4305SJonathan Corbet            #
1082370f4305SJonathan Corbet            r = KernRe(r'^(\w+)\s+')
1083370f4305SJonathan Corbet            if r.search(prototype):
1084370f4305SJonathan Corbet                return_type = ''
1085370f4305SJonathan Corbet                declaration_name = r.group(1)
1086370f4305SJonathan Corbet                func_macro = True
1087370f4305SJonathan Corbet                found = True
1088d966dc65SMauro Carvalho Chehab
1089d966dc65SMauro Carvalho Chehab        # Yes, this truly is vile.  We are looking for:
1090d966dc65SMauro Carvalho Chehab        # 1. Return type (may be nothing if we're looking at a macro)
1091d966dc65SMauro Carvalho Chehab        # 2. Function name
1092d966dc65SMauro Carvalho Chehab        # 3. Function parameters.
1093d966dc65SMauro Carvalho Chehab        #
1094d966dc65SMauro Carvalho Chehab        # All the while we have to watch out for function pointer parameters
1095d966dc65SMauro Carvalho Chehab        # (which IIRC is what the two sections are for), C types (these
1096d966dc65SMauro Carvalho Chehab        # regexps don't even start to express all the possibilities), and
1097d966dc65SMauro Carvalho Chehab        # so on.
1098d966dc65SMauro Carvalho Chehab        #
1099d966dc65SMauro Carvalho Chehab        # If you mess with these regexps, it's a good idea to check that
1100d966dc65SMauro Carvalho Chehab        # the following functions' documentation still comes out right:
1101d966dc65SMauro Carvalho Chehab        # - parport_register_device (function pointer parameters)
1102d966dc65SMauro Carvalho Chehab        # - atomic_set (macro)
1103d966dc65SMauro Carvalho Chehab        # - pci_match_device, __copy_to_user (long return type)
1104d966dc65SMauro Carvalho Chehab
1105fee63c8fSJonathan Corbet        name = r'\w+'
1106d966dc65SMauro Carvalho Chehab        type1 = r'(?:[\w\s]+)?'
1107d966dc65SMauro Carvalho Chehab        type2 = r'(?:[\w\s]+\*+)+'
1108ff1f2af3SJonathan Corbet        #
1109ff1f2af3SJonathan Corbet        # Attempt to match first on (args) with no internal parentheses; this
1110ff1f2af3SJonathan Corbet        # lets us easily filter out __acquires() and other post-args stuff.  If
1111ff1f2af3SJonathan Corbet        # that fails, just grab the rest of the line to the last closing
1112ff1f2af3SJonathan Corbet        # parenthesis.
1113ff1f2af3SJonathan Corbet        #
1114ff1f2af3SJonathan Corbet        proto_args = r'\(([^\(]*|.*)\)'
1115370f4305SJonathan Corbet        #
1116370f4305SJonathan Corbet        # (Except for the simple macro case) attempt to split up the prototype
1117370f4305SJonathan Corbet        # in the various ways we understand.
1118370f4305SJonathan Corbet        #
1119d966dc65SMauro Carvalho Chehab        if not found:
1120d966dc65SMauro Carvalho Chehab            patterns = [
1121ff1f2af3SJonathan Corbet                rf'^()({name})\s*{proto_args}',
1122ff1f2af3SJonathan Corbet                rf'^({type1})\s+({name})\s*{proto_args}',
1123ff1f2af3SJonathan Corbet                rf'^({type2})\s*({name})\s*{proto_args}',
1124d966dc65SMauro Carvalho Chehab            ]
1125d966dc65SMauro Carvalho Chehab
1126d966dc65SMauro Carvalho Chehab            for p in patterns:
112704a383ceSMauro Carvalho Chehab                r = KernRe(p)
1128d966dc65SMauro Carvalho Chehab                if r.match(prototype):
1129d966dc65SMauro Carvalho Chehab                    return_type = r.group(1)
1130d966dc65SMauro Carvalho Chehab                    declaration_name = r.group(2)
1131d966dc65SMauro Carvalho Chehab                    args = r.group(3)
1132d966dc65SMauro Carvalho Chehab                    self.create_parameter_list(ln, decl_type, args, ',',
1133d966dc65SMauro Carvalho Chehab                                               declaration_name)
1134d966dc65SMauro Carvalho Chehab                    found = True
1135d966dc65SMauro Carvalho Chehab                    break
11363dff5441SJonathan Corbet        #
11373dff5441SJonathan Corbet        # Parsing done; make sure that things are as we expect.
11383dff5441SJonathan Corbet        #
1139d966dc65SMauro Carvalho Chehab        if not found:
1140e3b42e94SMauro Carvalho Chehab            self.emit_msg(ln,
1141d966dc65SMauro Carvalho Chehab                          f"cannot understand function prototype: '{prototype}'")
1142d966dc65SMauro Carvalho Chehab            return
1143d966dc65SMauro Carvalho Chehab        if self.entry.identifier != declaration_name:
11443dff5441SJonathan Corbet            self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). "
11453dff5441SJonathan Corbet                          f"Prototype was for {declaration_name}() instead")
1146d966dc65SMauro Carvalho Chehab            return
1147636d4d9eSJonathan Corbet        self.check_sections(ln, declaration_name, "function")
1148d966dc65SMauro Carvalho Chehab        self.check_return_section(ln, declaration_name, return_type)
11493dff5441SJonathan Corbet        #
11503dff5441SJonathan Corbet        # Store the result.
11513dff5441SJonathan Corbet        #
11523dff5441SJonathan Corbet        self.output_declaration(decl_type, declaration_name,
11533dff5441SJonathan Corbet                                typedef=('typedef' in return_type),
11543dff5441SJonathan Corbet                                functiontype=return_type,
11553dff5441SJonathan Corbet                                purpose=self.entry.declaration_purpose,
11563dff5441SJonathan Corbet                                func_macro=func_macro)
1157d966dc65SMauro Carvalho Chehab
1158d966dc65SMauro Carvalho Chehab
1159d966dc65SMauro Carvalho Chehab    def dump_typedef(self, ln, proto):
1160d966dc65SMauro Carvalho Chehab        """
116150206750SMauro Carvalho Chehab        Store a ``typedef`` inside self.entries array.
1162d966dc65SMauro Carvalho Chehab        """
1163c0187843SJonathan Corbet        #
1164c0187843SJonathan Corbet        # We start by looking for function typedefs.
1165c0187843SJonathan Corbet        #
1166c0187843SJonathan Corbet        typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*'
1167d966dc65SMauro Carvalho Chehab        typedef_ident = r'\*?\s*(\w\S+)\s*'
1168d966dc65SMauro Carvalho Chehab        typedef_args = r'\s*\((.*)\);'
1169d966dc65SMauro Carvalho Chehab
1170c0187843SJonathan Corbet        typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
1171c0187843SJonathan Corbet        typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)
1172d966dc65SMauro Carvalho Chehab
1173d966dc65SMauro Carvalho Chehab        # Parse function typedef prototypes
1174d966dc65SMauro Carvalho Chehab        for r in [typedef1, typedef2]:
1175d966dc65SMauro Carvalho Chehab            if not r.match(proto):
1176d966dc65SMauro Carvalho Chehab                continue
1177d966dc65SMauro Carvalho Chehab
1178d966dc65SMauro Carvalho Chehab            return_type = r.group(1).strip()
1179d966dc65SMauro Carvalho Chehab            declaration_name = r.group(2)
1180d966dc65SMauro Carvalho Chehab            args = r.group(3)
1181d966dc65SMauro Carvalho Chehab
1182d966dc65SMauro Carvalho Chehab            if self.entry.identifier != declaration_name:
1183e3b42e94SMauro Carvalho Chehab                self.emit_msg(ln,
1184d966dc65SMauro Carvalho Chehab                              f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
1185d966dc65SMauro Carvalho Chehab                return
1186d966dc65SMauro Carvalho Chehab
1187c0187843SJonathan Corbet            self.create_parameter_list(ln, 'function', args, ',', declaration_name)
1188d966dc65SMauro Carvalho Chehab
1189c0187843SJonathan Corbet            self.output_declaration('function', declaration_name,
1190d966dc65SMauro Carvalho Chehab                                    typedef=True,
1191d966dc65SMauro Carvalho Chehab                                    functiontype=return_type,
1192d966dc65SMauro Carvalho Chehab                                    purpose=self.entry.declaration_purpose)
1193d966dc65SMauro Carvalho Chehab            return
1194c0187843SJonathan Corbet        #
1195c0187843SJonathan Corbet        # Not a function, try to parse a simple typedef.
1196c0187843SJonathan Corbet        #
119704a383ceSMauro Carvalho Chehab        r = KernRe(r'typedef.*\s+(\w+)\s*;')
1198d966dc65SMauro Carvalho Chehab        if r.match(proto):
1199d966dc65SMauro Carvalho Chehab            declaration_name = r.group(1)
1200d966dc65SMauro Carvalho Chehab
1201d966dc65SMauro Carvalho Chehab            if self.entry.identifier != declaration_name:
1202e3b42e94SMauro Carvalho Chehab                self.emit_msg(ln,
1203e3b42e94SMauro Carvalho Chehab                              f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
1204d966dc65SMauro Carvalho Chehab                return
1205d966dc65SMauro Carvalho Chehab
1206d966dc65SMauro Carvalho Chehab            self.output_declaration('typedef', declaration_name,
1207d966dc65SMauro Carvalho Chehab                                    purpose=self.entry.declaration_purpose)
1208d966dc65SMauro Carvalho Chehab            return
1209d966dc65SMauro Carvalho Chehab
1210e3b42e94SMauro Carvalho Chehab        self.emit_msg(ln, "error: Cannot parse typedef!")
1211d966dc65SMauro Carvalho Chehab
1212d966dc65SMauro Carvalho Chehab    @staticmethod
121316740c29SMauro Carvalho Chehab    def process_export(function_set, line):
1214d966dc65SMauro Carvalho Chehab        """
121550206750SMauro Carvalho Chehab        process ``EXPORT_SYMBOL*`` tags
1216d966dc65SMauro Carvalho Chehab
121716740c29SMauro Carvalho Chehab        This method doesn't use any variable from the class, so declare it
121816740c29SMauro Carvalho Chehab        with a staticmethod decorator.
1219d966dc65SMauro Carvalho Chehab        """
1220d966dc65SMauro Carvalho Chehab
122127ad33b6SMatthew Wilcox (Oracle)        # We support documenting some exported symbols with different
122227ad33b6SMatthew Wilcox (Oracle)        # names.  A horrible hack.
122327ad33b6SMatthew Wilcox (Oracle)        suffixes = [ '_noprof' ]
122427ad33b6SMatthew Wilcox (Oracle)
122516740c29SMauro Carvalho Chehab        # Note: it accepts only one EXPORT_SYMBOL* per line, as having
122616740c29SMauro Carvalho Chehab        # multiple export lines would violate Kernel coding style.
122716740c29SMauro Carvalho Chehab
1228d966dc65SMauro Carvalho Chehab        if export_symbol.search(line):
1229d966dc65SMauro Carvalho Chehab            symbol = export_symbol.group(2)
1230473734e0SJonathan Corbet        elif export_symbol_ns.search(line):
1231d966dc65SMauro Carvalho Chehab            symbol = export_symbol_ns.group(2)
1232473734e0SJonathan Corbet        else:
1233473734e0SJonathan Corbet            return False
1234473734e0SJonathan Corbet        #
1235473734e0SJonathan Corbet        # Found an export, trim out any special suffixes
1236473734e0SJonathan Corbet        #
123727ad33b6SMatthew Wilcox (Oracle)        for suffix in suffixes:
123839e39af7SMauro Carvalho Chehab            # Be backward compatible with Python < 3.9
123939e39af7SMauro Carvalho Chehab            if symbol.endswith(suffix):
124039e39af7SMauro Carvalho Chehab                symbol = symbol[:-len(suffix)]
124116740c29SMauro Carvalho Chehab        function_set.add(symbol)
1242473734e0SJonathan Corbet        return True
1243d966dc65SMauro Carvalho Chehab
1244d966dc65SMauro Carvalho Chehab    def process_normal(self, ln, line):
1245d966dc65SMauro Carvalho Chehab        """
124650206750SMauro Carvalho Chehab        STATE_NORMAL: looking for the ``/**`` to begin everything.
1247d966dc65SMauro Carvalho Chehab        """
1248d966dc65SMauro Carvalho Chehab
1249d966dc65SMauro Carvalho Chehab        if not doc_start.match(line):
1250d966dc65SMauro Carvalho Chehab            return
1251d966dc65SMauro Carvalho Chehab
1252d966dc65SMauro Carvalho Chehab        # start a new entry
1253c3597ab2SMauro Carvalho Chehab        self.reset_state(ln)
1254d966dc65SMauro Carvalho Chehab
1255d966dc65SMauro Carvalho Chehab        # next line is always the function name
1256f9cdbc57SMauro Carvalho Chehab        self.state = state.NAME
1257d966dc65SMauro Carvalho Chehab
1258d966dc65SMauro Carvalho Chehab    def process_name(self, ln, line):
1259d966dc65SMauro Carvalho Chehab        """
1260d966dc65SMauro Carvalho Chehab        STATE_NAME: Looking for the "name - description" line
1261d966dc65SMauro Carvalho Chehab        """
12628666a352SJonathan Corbet        #
12638666a352SJonathan Corbet        # Check for a DOC: block and handle them specially.
12648666a352SJonathan Corbet        #
1265d966dc65SMauro Carvalho Chehab        if doc_block.search(line):
1266d966dc65SMauro Carvalho Chehab
1267d966dc65SMauro Carvalho Chehab            if not doc_block.group(1):
1268df275526SJonathan Corbet                self.entry.begin_section(ln, "Introduction")
1269d966dc65SMauro Carvalho Chehab            else:
1270df275526SJonathan Corbet                self.entry.begin_section(ln, doc_block.group(1))
1271d966dc65SMauro Carvalho Chehab
1272408269aeSMauro Carvalho Chehab            self.entry.identifier = self.entry.section
1273f9cdbc57SMauro Carvalho Chehab            self.state = state.DOCBLOCK
12748666a352SJonathan Corbet        #
12758666a352SJonathan Corbet        # Otherwise we're looking for a normal kerneldoc declaration line.
12768666a352SJonathan Corbet        #
12778666a352SJonathan Corbet        elif doc_decl.search(line):
1278d966dc65SMauro Carvalho Chehab            self.entry.identifier = doc_decl.group(1)
1279d966dc65SMauro Carvalho Chehab
1280d966dc65SMauro Carvalho Chehab            # Test for data declaration
12810682bde2SJonathan Corbet            if doc_begin_data.search(line):
12820682bde2SJonathan Corbet                self.entry.decl_type = doc_begin_data.group(1)
12830682bde2SJonathan Corbet                self.entry.identifier = doc_begin_data.group(2)
1284e76a1d2bSJonathan Corbet            #
1285e76a1d2bSJonathan Corbet            # Look for a function description
1286e76a1d2bSJonathan Corbet            #
12870682bde2SJonathan Corbet            elif doc_begin_func.search(line):
12880682bde2SJonathan Corbet                self.entry.identifier = doc_begin_func.group(1)
1289d966dc65SMauro Carvalho Chehab                self.entry.decl_type = "function"
12908f4650feSJonathan Corbet            #
12918f4650feSJonathan Corbet            # We struck out.
12928f4650feSJonathan Corbet            #
1293d966dc65SMauro Carvalho Chehab            else:
1294e3b42e94SMauro Carvalho Chehab                self.emit_msg(ln,
12955f88f44dSRandy Dunlap                              f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}")
1296f9cdbc57SMauro Carvalho Chehab                self.state = state.NORMAL
12978f4650feSJonathan Corbet                return
12988666a352SJonathan Corbet            #
12998666a352SJonathan Corbet            # OK, set up for a new kerneldoc entry.
13008666a352SJonathan Corbet            #
1301d966dc65SMauro Carvalho Chehab            self.state = state.BODY
13028666a352SJonathan Corbet            self.entry.identifier = self.entry.identifier.strip(" ")
1303d966dc65SMauro Carvalho Chehab            # if there's no @param blocks need to set up default section here
1304df275526SJonathan Corbet            self.entry.begin_section(ln + 1)
13058666a352SJonathan Corbet            #
13068666a352SJonathan Corbet            # Find the description portion, which *should* be there but
13078666a352SJonathan Corbet            # isn't always.
13088666a352SJonathan Corbet            # (We should be able to capture this from the previous parsing - someday)
13098666a352SJonathan Corbet            #
1310d966dc65SMauro Carvalho Chehab            r = KernRe("[-:](.*)")
1311d966dc65SMauro Carvalho Chehab            if r.search(line):
1312f9b4cf2eSJonathan Corbet                self.entry.declaration_purpose = trim_whitespace(r.group(1))
1313e4153a22SJonathan Corbet                self.state = state.DECLARATION
1314d966dc65SMauro Carvalho Chehab            else:
1315d966dc65SMauro Carvalho Chehab                self.entry.declaration_purpose = ""
1316d966dc65SMauro Carvalho Chehab
1317d966dc65SMauro Carvalho Chehab            if not self.entry.declaration_purpose and self.config.wshort_desc:
1318e3b42e94SMauro Carvalho Chehab                self.emit_msg(ln,
1319d966dc65SMauro Carvalho Chehab                              f"missing initial short description on line:\n{line}")
1320d966dc65SMauro Carvalho Chehab
1321d966dc65SMauro Carvalho Chehab            if not self.entry.identifier and self.entry.decl_type != "enum":
1322e3b42e94SMauro Carvalho Chehab                self.emit_msg(ln,
1323d966dc65SMauro Carvalho Chehab                              f"wrong kernel-doc identifier on line:\n{line}")
1324f9cdbc57SMauro Carvalho Chehab                self.state = state.NORMAL
1325d966dc65SMauro Carvalho Chehab
1326d966dc65SMauro Carvalho Chehab            if self.config.verbose:
1327e3b42e94SMauro Carvalho Chehab                self.emit_msg(ln,
1328d966dc65SMauro Carvalho Chehab                              f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}",
1329d966dc65SMauro Carvalho Chehab                                  warning=False)
13308666a352SJonathan Corbet        #
1331d966dc65SMauro Carvalho Chehab        # Failed to find an identifier. Emit a warning
13328666a352SJonathan Corbet        #
13338666a352SJonathan Corbet        else:
1334e3b42e94SMauro Carvalho Chehab            self.emit_msg(ln, f"Cannot find identifier on line:\n{line}")
1335d966dc65SMauro Carvalho Chehab
133699327067SJonathan Corbet    def is_new_section(self, ln, line):
133750206750SMauro Carvalho Chehab        """
133850206750SMauro Carvalho Chehab        Helper function to determine if a new section is being started.
133950206750SMauro Carvalho Chehab        """
1340d966dc65SMauro Carvalho Chehab        if doc_sect.search(line):
1341e65d54e1SJonathan Corbet            self.state = state.BODY
1342e65d54e1SJonathan Corbet            #
1343e65d54e1SJonathan Corbet            # Pick out the name of our new section, tweaking it if need be.
1344e65d54e1SJonathan Corbet            #
1345d966dc65SMauro Carvalho Chehab            newsection = doc_sect.group(1)
1346e65d54e1SJonathan Corbet            if newsection.lower() == 'description':
1347e65d54e1SJonathan Corbet                newsection = 'Description'
1348e65d54e1SJonathan Corbet            elif newsection.lower() == 'context':
1349e65d54e1SJonathan Corbet                newsection = 'Context'
1350e65d54e1SJonathan Corbet                self.state = state.SPECIAL_SECTION
1351e65d54e1SJonathan Corbet            elif newsection.lower() in ["@return", "@returns",
1352d966dc65SMauro Carvalho Chehab                                        "return", "returns"]:
1353d966dc65SMauro Carvalho Chehab                newsection = "Return"
1354e65d54e1SJonathan Corbet                self.state = state.SPECIAL_SECTION
1355e65d54e1SJonathan Corbet            elif newsection[0] == '@':
1356e65d54e1SJonathan Corbet                self.state = state.SPECIAL_SECTION
1357e65d54e1SJonathan Corbet            #
1358e65d54e1SJonathan Corbet            # Initialize the contents, and get the new section going.
1359e65d54e1SJonathan Corbet            #
1360d966dc65SMauro Carvalho Chehab            newcontents = doc_sect.group(2)
1361d966dc65SMauro Carvalho Chehab            if not newcontents:
1362d966dc65SMauro Carvalho Chehab                newcontents = ""
1363d966dc65SMauro Carvalho Chehab            self.dump_section()
1364e4153a22SJonathan Corbet            self.entry.begin_section(ln, newsection)
1365d966dc65SMauro Carvalho Chehab            self.entry.leading_space = None
1366d966dc65SMauro Carvalho Chehab
1367d6699d5fSJonathan Corbet            self.entry.add_text(newcontents.lstrip())
136899327067SJonathan Corbet            return True
136999327067SJonathan Corbet        return False
1370d966dc65SMauro Carvalho Chehab
13712ad02b94SJonathan Corbet    def is_comment_end(self, ln, line):
137250206750SMauro Carvalho Chehab        """
137350206750SMauro Carvalho Chehab        Helper function to detect (and effect) the end of a kerneldoc comment.
137450206750SMauro Carvalho Chehab        """
1375d966dc65SMauro Carvalho Chehab        if doc_end.search(line):
1376d966dc65SMauro Carvalho Chehab            self.dump_section()
1377d966dc65SMauro Carvalho Chehab
1378d966dc65SMauro Carvalho Chehab            # Look for doc_com + <text> + doc_end:
13795fd513f0SJonathan Corbet            r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/')
1380d966dc65SMauro Carvalho Chehab            if r.match(line):
1381e3b42e94SMauro Carvalho Chehab                self.emit_msg(ln, f"suspicious ending line: {line}")
1382d966dc65SMauro Carvalho Chehab
1383d966dc65SMauro Carvalho Chehab            self.entry.prototype = ""
1384d966dc65SMauro Carvalho Chehab            self.entry.new_start_line = ln + 1
1385d966dc65SMauro Carvalho Chehab
1386f9cdbc57SMauro Carvalho Chehab            self.state = state.PROTO
13872ad02b94SJonathan Corbet            return True
13882ad02b94SJonathan Corbet        return False
1389d966dc65SMauro Carvalho Chehab
13902ad02b94SJonathan Corbet
13912ad02b94SJonathan Corbet    def process_decl(self, ln, line):
13922ad02b94SJonathan Corbet        """
139350206750SMauro Carvalho Chehab        STATE_DECLARATION: We've seen the beginning of a declaration.
13942ad02b94SJonathan Corbet        """
13952ad02b94SJonathan Corbet        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1396e4153a22SJonathan Corbet            return
1397ccad65a4SJonathan Corbet        #
1398ccad65a4SJonathan Corbet        # Look for anything with the " * " line beginning.
1399ccad65a4SJonathan Corbet        #
1400d966dc65SMauro Carvalho Chehab        if doc_content.search(line):
1401d966dc65SMauro Carvalho Chehab            cont = doc_content.group(1)
1402ccad65a4SJonathan Corbet            #
1403ccad65a4SJonathan Corbet            # A blank line means that we have moved out of the declaration
1404ccad65a4SJonathan Corbet            # part of the comment (without any "special section" parameter
1405ccad65a4SJonathan Corbet            # descriptions).
1406ccad65a4SJonathan Corbet            #
1407d966dc65SMauro Carvalho Chehab            if cont == "":
1408f9cdbc57SMauro Carvalho Chehab                self.state = state.BODY
1409ccad65a4SJonathan Corbet            #
1410ccad65a4SJonathan Corbet            # Otherwise we have more of the declaration section to soak up.
1411ccad65a4SJonathan Corbet            #
1412d966dc65SMauro Carvalho Chehab            else:
1413ccad65a4SJonathan Corbet                self.entry.declaration_purpose = \
1414ccad65a4SJonathan Corbet                    trim_whitespace(self.entry.declaration_purpose + ' ' + cont)
1415d966dc65SMauro Carvalho Chehab        else:
1416e4153a22SJonathan Corbet            # Unknown line, ignore
1417e4153a22SJonathan Corbet            self.emit_msg(ln, f"bad line: {line}")
1418e4153a22SJonathan Corbet
1419e4153a22SJonathan Corbet
142074cee0dfSJonathan Corbet    def process_special(self, ln, line):
142174cee0dfSJonathan Corbet        """
142250206750SMauro Carvalho Chehab        STATE_SPECIAL_SECTION: a section ending with a blank line.
142374cee0dfSJonathan Corbet        """
142407e04d8eSJonathan Corbet        #
142507e04d8eSJonathan Corbet        # If we have hit a blank line (only the " * " marker), then this
142607e04d8eSJonathan Corbet        # section is done.
142707e04d8eSJonathan Corbet        #
1428e65d54e1SJonathan Corbet        if KernRe(r"\s*\*\s*$").match(line):
142974cee0dfSJonathan Corbet            self.entry.begin_section(ln, dump = True)
1430f9cdbc57SMauro Carvalho Chehab            self.state = state.BODY
143107e04d8eSJonathan Corbet            return
143207e04d8eSJonathan Corbet        #
143307e04d8eSJonathan Corbet        # Not a blank line, look for the other ways to end the section.
143407e04d8eSJonathan Corbet        #
143507e04d8eSJonathan Corbet        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
143607e04d8eSJonathan Corbet            return
143707e04d8eSJonathan Corbet        #
143807e04d8eSJonathan Corbet        # OK, we should have a continuation of the text for this section.
143907e04d8eSJonathan Corbet        #
144007e04d8eSJonathan Corbet        if doc_content.search(line):
144107e04d8eSJonathan Corbet            cont = doc_content.group(1)
144207e04d8eSJonathan Corbet            #
144307e04d8eSJonathan Corbet            # If the lines of text after the first in a special section have
144407e04d8eSJonathan Corbet            # leading white space, we need to trim it out or Sphinx will get
144507e04d8eSJonathan Corbet            # confused.  For the second line (the None case), see what we
144607e04d8eSJonathan Corbet            # find there and remember it.
144707e04d8eSJonathan Corbet            #
1448d966dc65SMauro Carvalho Chehab            if self.entry.leading_space is None:
144904a383ceSMauro Carvalho Chehab                r = KernRe(r'^(\s+)')
1450d966dc65SMauro Carvalho Chehab                if r.match(cont):
1451d966dc65SMauro Carvalho Chehab                    self.entry.leading_space = len(r.group(1))
1452d966dc65SMauro Carvalho Chehab                else:
1453d966dc65SMauro Carvalho Chehab                    self.entry.leading_space = 0
145407e04d8eSJonathan Corbet            #
145507e04d8eSJonathan Corbet            # Otherwise, before trimming any leading chars, be *sure*
145607e04d8eSJonathan Corbet            # that they are white space.  We should maybe warn if this
145707e04d8eSJonathan Corbet            # isn't the case.
145807e04d8eSJonathan Corbet            #
1459d966dc65SMauro Carvalho Chehab            for i in range(0, self.entry.leading_space):
1460d966dc65SMauro Carvalho Chehab                if cont[i] != " ":
146107e04d8eSJonathan Corbet                    self.entry.leading_space = i
1462d966dc65SMauro Carvalho Chehab                    break
146307e04d8eSJonathan Corbet            #
146407e04d8eSJonathan Corbet            # Add the trimmed result to the section and we're done.
146507e04d8eSJonathan Corbet            #
1466d6699d5fSJonathan Corbet            self.entry.add_text(cont[self.entry.leading_space:])
146707e04d8eSJonathan Corbet        else:
1468d966dc65SMauro Carvalho Chehab            # Unknown line, ignore
1469e3b42e94SMauro Carvalho Chehab            self.emit_msg(ln, f"bad line: {line}")
1470d966dc65SMauro Carvalho Chehab
1471d966dc65SMauro Carvalho Chehab    def process_body(self, ln, line):
1472d966dc65SMauro Carvalho Chehab        """
1473e4153a22SJonathan Corbet        STATE_BODY: the bulk of a kerneldoc comment.
1474d966dc65SMauro Carvalho Chehab        """
14752ad02b94SJonathan Corbet        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1476d966dc65SMauro Carvalho Chehab            return
1477d966dc65SMauro Carvalho Chehab
1478d966dc65SMauro Carvalho Chehab        if doc_content.search(line):
1479d966dc65SMauro Carvalho Chehab            cont = doc_content.group(1)
1480d6699d5fSJonathan Corbet            self.entry.add_text(cont)
148107e04d8eSJonathan Corbet        else:
1482d966dc65SMauro Carvalho Chehab            # Unknown line, ignore
1483d966dc65SMauro Carvalho Chehab            self.emit_msg(ln, f"bad line: {line}")
1484d966dc65SMauro Carvalho Chehab
1485c7eedb09SJonathan Corbet    def process_inline_name(self, ln, line):
1486c7eedb09SJonathan Corbet        """STATE_INLINE_NAME: beginning of docbook comments within a prototype."""
1487d966dc65SMauro Carvalho Chehab
1488c7eedb09SJonathan Corbet        if doc_inline_sect.search(line):
1489df275526SJonathan Corbet            self.entry.begin_section(ln, doc_inline_sect.group(1))
1490d6699d5fSJonathan Corbet            self.entry.add_text(doc_inline_sect.group(2).lstrip())
1491096f73abSJonathan Corbet            self.state = state.INLINE_TEXT
1492c7eedb09SJonathan Corbet        elif doc_inline_end.search(line):
1493c7eedb09SJonathan Corbet            self.dump_section()
1494c7eedb09SJonathan Corbet            self.state = state.PROTO
1495c7eedb09SJonathan Corbet        elif doc_content.search(line):
1496c7eedb09SJonathan Corbet            self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")
1497c7eedb09SJonathan Corbet            self.state = state.PROTO
1498c7eedb09SJonathan Corbet        # else ... ??
1499c7eedb09SJonathan Corbet
1500c7eedb09SJonathan Corbet    def process_inline_text(self, ln, line):
1501c7eedb09SJonathan Corbet        """STATE_INLINE_TEXT: docbook comments within a prototype."""
1502d966dc65SMauro Carvalho Chehab
1503d966dc65SMauro Carvalho Chehab        if doc_inline_end.search(line):
1504d966dc65SMauro Carvalho Chehab            self.dump_section()
1505d966dc65SMauro Carvalho Chehab            self.state = state.PROTO
1506c7eedb09SJonathan Corbet        elif doc_content.search(line):
1507d6699d5fSJonathan Corbet            self.entry.add_text(doc_content.group(1))
1508c7eedb09SJonathan Corbet        # else ... ??
1509d966dc65SMauro Carvalho Chehab
1510d966dc65SMauro Carvalho Chehab    def syscall_munge(self, ln, proto):         # pylint: disable=W0613
1511d966dc65SMauro Carvalho Chehab        """
151250206750SMauro Carvalho Chehab        Handle syscall definitions.
1513d966dc65SMauro Carvalho Chehab        """
1514d966dc65SMauro Carvalho Chehab
1515d966dc65SMauro Carvalho Chehab        is_void = False
1516d966dc65SMauro Carvalho Chehab
1517d966dc65SMauro Carvalho Chehab        # Strip newlines/CR's
1518d966dc65SMauro Carvalho Chehab        proto = re.sub(r'[\r\n]+', ' ', proto)
1519d966dc65SMauro Carvalho Chehab
1520d966dc65SMauro Carvalho Chehab        # Check if it's a SYSCALL_DEFINE0
1521d966dc65SMauro Carvalho Chehab        if 'SYSCALL_DEFINE0' in proto:
1522d966dc65SMauro Carvalho Chehab            is_void = True
1523d966dc65SMauro Carvalho Chehab
1524d966dc65SMauro Carvalho Chehab        # Replace SYSCALL_DEFINE with correct return type & function name
152504a383ceSMauro Carvalho Chehab        proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto)
1526d966dc65SMauro Carvalho Chehab
152704a383ceSMauro Carvalho Chehab        r = KernRe(r'long\s+(sys_.*?),')
1528d966dc65SMauro Carvalho Chehab        if r.search(proto):
152904a383ceSMauro Carvalho Chehab            proto = KernRe(',').sub('(', proto, count=1)
1530d966dc65SMauro Carvalho Chehab        elif is_void:
153104a383ceSMauro Carvalho Chehab            proto = KernRe(r'\)').sub('(void)', proto, count=1)
1532d966dc65SMauro Carvalho Chehab
1533d966dc65SMauro Carvalho Chehab        # Now delete all of the odd-numbered commas in the proto
1534d966dc65SMauro Carvalho Chehab        # so that argument types & names don't have a comma between them
1535d966dc65SMauro Carvalho Chehab        count = 0
1536d966dc65SMauro Carvalho Chehab        length = len(proto)
1537d966dc65SMauro Carvalho Chehab
1538d966dc65SMauro Carvalho Chehab        if is_void:
1539d966dc65SMauro Carvalho Chehab            length = 0  # skip the loop if is_void
1540d966dc65SMauro Carvalho Chehab
1541d966dc65SMauro Carvalho Chehab        for ix in range(length):
1542d966dc65SMauro Carvalho Chehab            if proto[ix] == ',':
1543d966dc65SMauro Carvalho Chehab                count += 1
1544d966dc65SMauro Carvalho Chehab                if count % 2 == 1:
1545d966dc65SMauro Carvalho Chehab                    proto = proto[:ix] + ' ' + proto[ix + 1:]
1546d966dc65SMauro Carvalho Chehab
1547d966dc65SMauro Carvalho Chehab        return proto
1548d966dc65SMauro Carvalho Chehab
1549d966dc65SMauro Carvalho Chehab    def tracepoint_munge(self, ln, proto):
1550d966dc65SMauro Carvalho Chehab        """
155150206750SMauro Carvalho Chehab        Handle tracepoint definitions.
1552d966dc65SMauro Carvalho Chehab        """
1553d966dc65SMauro Carvalho Chehab
1554d966dc65SMauro Carvalho Chehab        tracepointname = None
1555d966dc65SMauro Carvalho Chehab        tracepointargs = None
1556d966dc65SMauro Carvalho Chehab
1557d966dc65SMauro Carvalho Chehab        # Match tracepoint name based on different patterns
155804a383ceSMauro Carvalho Chehab        r = KernRe(r'TRACE_EVENT\((.*?),')
1559d966dc65SMauro Carvalho Chehab        if r.search(proto):
1560d966dc65SMauro Carvalho Chehab            tracepointname = r.group(1)
1561d966dc65SMauro Carvalho Chehab
156204a383ceSMauro Carvalho Chehab        r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),')
1563d966dc65SMauro Carvalho Chehab        if r.search(proto):
1564d966dc65SMauro Carvalho Chehab            tracepointname = r.group(1)
1565d966dc65SMauro Carvalho Chehab
156604a383ceSMauro Carvalho Chehab        r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),')
1567d966dc65SMauro Carvalho Chehab        if r.search(proto):
1568d966dc65SMauro Carvalho Chehab            tracepointname = r.group(2)
1569d966dc65SMauro Carvalho Chehab
1570d966dc65SMauro Carvalho Chehab        if tracepointname:
1571d966dc65SMauro Carvalho Chehab            tracepointname = tracepointname.lstrip()
1572d966dc65SMauro Carvalho Chehab
157304a383ceSMauro Carvalho Chehab        r = KernRe(r'TP_PROTO\((.*?)\)')
1574d966dc65SMauro Carvalho Chehab        if r.search(proto):
1575d966dc65SMauro Carvalho Chehab            tracepointargs = r.group(1)
1576d966dc65SMauro Carvalho Chehab
1577d966dc65SMauro Carvalho Chehab        if not tracepointname or not tracepointargs:
1578e3b42e94SMauro Carvalho Chehab            self.emit_msg(ln,
1579d966dc65SMauro Carvalho Chehab                          f"Unrecognized tracepoint format:\n{proto}\n")
1580d966dc65SMauro Carvalho Chehab        else:
1581d966dc65SMauro Carvalho Chehab            proto = f"static inline void trace_{tracepointname}({tracepointargs})"
1582d966dc65SMauro Carvalho Chehab            self.entry.identifier = f"trace_{self.entry.identifier}"
1583d966dc65SMauro Carvalho Chehab
1584d966dc65SMauro Carvalho Chehab        return proto
1585d966dc65SMauro Carvalho Chehab
1586d966dc65SMauro Carvalho Chehab    def process_proto_function(self, ln, line):
158750206750SMauro Carvalho Chehab        """Ancillary routine to process a function prototype."""
1588d966dc65SMauro Carvalho Chehab
1589d966dc65SMauro Carvalho Chehab        # strip C99-style comments to end of line
15905fd513f0SJonathan Corbet        line = KernRe(r"//.*$", re.S).sub('', line)
1591901f5069SJonathan Corbet        #
1592901f5069SJonathan Corbet        # Soak up the line's worth of prototype text, stopping at { or ; if present.
1593901f5069SJonathan Corbet        #
159404a383ceSMauro Carvalho Chehab        if KernRe(r'\s*#\s*define').match(line):
1595d966dc65SMauro Carvalho Chehab            self.entry.prototype = line
1596901f5069SJonathan Corbet        elif not line.startswith('#'):   # skip other preprocessor stuff
159704a383ceSMauro Carvalho Chehab            r = KernRe(r'([^\{]*)')
1598d966dc65SMauro Carvalho Chehab            if r.match(line):
1599d966dc65SMauro Carvalho Chehab                self.entry.prototype += r.group(1) + " "
1600901f5069SJonathan Corbet        #
1601901f5069SJonathan Corbet        # If we now have the whole prototype, clean it up and declare victory.
1602901f5069SJonathan Corbet        #
160304a383ceSMauro Carvalho Chehab        if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line):
1604362ec251SJonathan Corbet            # strip comments and surrounding spaces
1605901f5069SJonathan Corbet            self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip()
1606901f5069SJonathan Corbet            #
1607d966dc65SMauro Carvalho Chehab            # Handle self.entry.prototypes for function pointers like:
1608d966dc65SMauro Carvalho Chehab            #       int (*pcs_config)(struct foo)
1609901f5069SJonathan Corbet            # by turning it into
1610901f5069SJonathan Corbet            #	    int pcs_config(struct foo)
1611901f5069SJonathan Corbet            #
161204a383ceSMauro Carvalho Chehab            r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)')
1613d966dc65SMauro Carvalho Chehab            self.entry.prototype = r.sub(r'\1\2', self.entry.prototype)
1614901f5069SJonathan Corbet            #
1615901f5069SJonathan Corbet            # Handle special declaration syntaxes
1616901f5069SJonathan Corbet            #
1617d966dc65SMauro Carvalho Chehab            if 'SYSCALL_DEFINE' in self.entry.prototype:
1618d966dc65SMauro Carvalho Chehab                self.entry.prototype = self.syscall_munge(ln,
1619d966dc65SMauro Carvalho Chehab                                                          self.entry.prototype)
1620901f5069SJonathan Corbet            else:
162104a383ceSMauro Carvalho Chehab                r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT')
1622d966dc65SMauro Carvalho Chehab                if r.search(self.entry.prototype):
1623d966dc65SMauro Carvalho Chehab                    self.entry.prototype = self.tracepoint_munge(ln,
1624d966dc65SMauro Carvalho Chehab                                                                 self.entry.prototype)
1625901f5069SJonathan Corbet            #
1626901f5069SJonathan Corbet            # ... and we're done
1627901f5069SJonathan Corbet            #
1628d966dc65SMauro Carvalho Chehab            self.dump_function(ln, self.entry.prototype)
1629d966dc65SMauro Carvalho Chehab            self.reset_state(ln)
1630d966dc65SMauro Carvalho Chehab
1631d966dc65SMauro Carvalho Chehab    def process_proto_type(self, ln, line):
163250206750SMauro Carvalho Chehab        """
163350206750SMauro Carvalho Chehab        Ancillary routine to process a type.
163450206750SMauro Carvalho Chehab        """
1635d966dc65SMauro Carvalho Chehab
1636362ec251SJonathan Corbet        # Strip C99-style comments and surrounding whitespace
1637362ec251SJonathan Corbet        line = KernRe(r"//.*$", re.S).sub('', line).strip()
16381aeb8099SJonathan Corbet        if not line:
16391aeb8099SJonathan Corbet            return # nothing to see here
1640d966dc65SMauro Carvalho Chehab
1641d966dc65SMauro Carvalho Chehab        # To distinguish preprocessor directive from regular declaration later.
1642d966dc65SMauro Carvalho Chehab        if line.startswith('#'):
1643d966dc65SMauro Carvalho Chehab            line += ";"
16441aeb8099SJonathan Corbet        #
16451aeb8099SJonathan Corbet        # Split the declaration on any of { } or ;, and accumulate pieces
16461aeb8099SJonathan Corbet        # until we hit a semicolon while not inside {brackets}
16471aeb8099SJonathan Corbet        #
16481aeb8099SJonathan Corbet        r = KernRe(r'(.*?)([{};])')
16491aeb8099SJonathan Corbet        for chunk in r.split(line):
16501aeb8099SJonathan Corbet            if chunk:  # Ignore empty matches
16511aeb8099SJonathan Corbet                self.entry.prototype += chunk
16521aeb8099SJonathan Corbet                #
16531aeb8099SJonathan Corbet                # This cries out for a match statement ... someday after we can
16541aeb8099SJonathan Corbet                # drop Python 3.9 ...
16551aeb8099SJonathan Corbet                #
16561aeb8099SJonathan Corbet                if chunk == '{':
16571aeb8099SJonathan Corbet                    self.entry.brcount += 1
16581aeb8099SJonathan Corbet                elif chunk == '}':
16591aeb8099SJonathan Corbet                    self.entry.brcount -= 1
16601aeb8099SJonathan Corbet                elif chunk == ';' and self.entry.brcount <= 0:
1661d966dc65SMauro Carvalho Chehab                    self.dump_declaration(ln, self.entry.prototype)
1662d966dc65SMauro Carvalho Chehab                    self.reset_state(ln)
16631aeb8099SJonathan Corbet                    return
16641aeb8099SJonathan Corbet        #
16651aeb8099SJonathan Corbet        # We hit the end of the line while still in the declaration; put
16661aeb8099SJonathan Corbet        # in a space to represent the newline.
16671aeb8099SJonathan Corbet        #
16681aeb8099SJonathan Corbet        self.entry.prototype += ' '
1669d966dc65SMauro Carvalho Chehab
1670d966dc65SMauro Carvalho Chehab    def process_proto(self, ln, line):
1671d966dc65SMauro Carvalho Chehab        """STATE_PROTO: reading a function/whatever prototype."""
1672d966dc65SMauro Carvalho Chehab
1673d966dc65SMauro Carvalho Chehab        if doc_inline_oneline.search(line):
1674df275526SJonathan Corbet            self.entry.begin_section(ln, doc_inline_oneline.group(1))
1675d6699d5fSJonathan Corbet            self.entry.add_text(doc_inline_oneline.group(2))
1676d6699d5fSJonathan Corbet            self.dump_section()
1677d966dc65SMauro Carvalho Chehab
1678d966dc65SMauro Carvalho Chehab        elif doc_inline_start.search(line):
1679096f73abSJonathan Corbet            self.state = state.INLINE_NAME
1680d966dc65SMauro Carvalho Chehab
1681d966dc65SMauro Carvalho Chehab        elif self.entry.decl_type == 'function':
1682d966dc65SMauro Carvalho Chehab            self.process_proto_function(ln, line)
1683d966dc65SMauro Carvalho Chehab
1684d966dc65SMauro Carvalho Chehab        else:
1685d966dc65SMauro Carvalho Chehab            self.process_proto_type(ln, line)
1686d966dc65SMauro Carvalho Chehab
1687d966dc65SMauro Carvalho Chehab    def process_docblock(self, ln, line):
168850206750SMauro Carvalho Chehab        """STATE_DOCBLOCK: within a ``DOC:`` block."""
1689d966dc65SMauro Carvalho Chehab
1690d966dc65SMauro Carvalho Chehab        if doc_end.search(line):
1691d966dc65SMauro Carvalho Chehab            self.dump_section()
16928d733875SJonathan Corbet            self.output_declaration("doc", self.entry.identifier)
1693d966dc65SMauro Carvalho Chehab            self.reset_state(ln)
1694d966dc65SMauro Carvalho Chehab
1695d966dc65SMauro Carvalho Chehab        elif doc_content.search(line):
1696d6699d5fSJonathan Corbet            self.entry.add_text(doc_content.group(1))
1697d966dc65SMauro Carvalho Chehab
169816740c29SMauro Carvalho Chehab    def parse_export(self):
169916740c29SMauro Carvalho Chehab        """
170050206750SMauro Carvalho Chehab        Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file.
170116740c29SMauro Carvalho Chehab        """
170216740c29SMauro Carvalho Chehab
170316740c29SMauro Carvalho Chehab        export_table = set()
170416740c29SMauro Carvalho Chehab
170516740c29SMauro Carvalho Chehab        try:
170616740c29SMauro Carvalho Chehab            with open(self.fname, "r", encoding="utf8",
170716740c29SMauro Carvalho Chehab                      errors="backslashreplace") as fp:
170816740c29SMauro Carvalho Chehab
170916740c29SMauro Carvalho Chehab                for line in fp:
171016740c29SMauro Carvalho Chehab                    self.process_export(export_table, line)
171116740c29SMauro Carvalho Chehab
171216740c29SMauro Carvalho Chehab        except IOError:
171316740c29SMauro Carvalho Chehab            return None
171416740c29SMauro Carvalho Chehab
171516740c29SMauro Carvalho Chehab        return export_table
171616740c29SMauro Carvalho Chehab
171750206750SMauro Carvalho Chehab    #: The state/action table telling us which function to invoke in each state.
1718cef8c781SJonathan Corbet    state_actions = {
1719cef8c781SJonathan Corbet        state.NORMAL:			process_normal,
1720cef8c781SJonathan Corbet        state.NAME:			process_name,
1721cef8c781SJonathan Corbet        state.BODY:			process_body,
1722e4153a22SJonathan Corbet        state.DECLARATION:		process_decl,
172374cee0dfSJonathan Corbet        state.SPECIAL_SECTION:		process_special,
1724c7eedb09SJonathan Corbet        state.INLINE_NAME:		process_inline_name,
1725c7eedb09SJonathan Corbet        state.INLINE_TEXT:		process_inline_text,
1726cef8c781SJonathan Corbet        state.PROTO:			process_proto,
1727cef8c781SJonathan Corbet        state.DOCBLOCK:			process_docblock,
1728cef8c781SJonathan Corbet        }
1729cef8c781SJonathan Corbet
173016740c29SMauro Carvalho Chehab    def parse_kdoc(self):
1731d966dc65SMauro Carvalho Chehab        """
1732d966dc65SMauro Carvalho Chehab        Open and process each line of a C source file.
173316740c29SMauro Carvalho Chehab        The parsing is controlled via a state machine, and the line is passed
1734d966dc65SMauro Carvalho Chehab        to a different process function depending on the state. The process
1735d966dc65SMauro Carvalho Chehab        function may update the state as needed.
173616740c29SMauro Carvalho Chehab
173716740c29SMauro Carvalho Chehab        Besides parsing kernel-doc tags, it also parses export symbols.
1738d966dc65SMauro Carvalho Chehab        """
1739d966dc65SMauro Carvalho Chehab
1740d966dc65SMauro Carvalho Chehab        prev = ""
1741d966dc65SMauro Carvalho Chehab        prev_ln = None
174216740c29SMauro Carvalho Chehab        export_table = set()
1743d966dc65SMauro Carvalho Chehab
1744d966dc65SMauro Carvalho Chehab        try:
1745d966dc65SMauro Carvalho Chehab            with open(self.fname, "r", encoding="utf8",
1746d966dc65SMauro Carvalho Chehab                      errors="backslashreplace") as fp:
1747d966dc65SMauro Carvalho Chehab                for ln, line in enumerate(fp):
1748d966dc65SMauro Carvalho Chehab
1749d966dc65SMauro Carvalho Chehab                    line = line.expandtabs().strip("\n")
1750d966dc65SMauro Carvalho Chehab
1751d966dc65SMauro Carvalho Chehab                    # Group continuation lines on prototypes
1752f9cdbc57SMauro Carvalho Chehab                    if self.state == state.PROTO:
1753d966dc65SMauro Carvalho Chehab                        if line.endswith("\\"):
1754e4b2bd90SMauro Carvalho Chehab                            prev += line.rstrip("\\")
1755d966dc65SMauro Carvalho Chehab                            if not prev_ln:
1756d966dc65SMauro Carvalho Chehab                                prev_ln = ln
1757d966dc65SMauro Carvalho Chehab                            continue
1758d966dc65SMauro Carvalho Chehab
1759e8f0303eSJonathan Corbet                        if prev:
1760d966dc65SMauro Carvalho Chehab                            ln = prev_ln
1761d966dc65SMauro Carvalho Chehab                            line = prev + line
1762d966dc65SMauro Carvalho Chehab                            prev = ""
1763d966dc65SMauro Carvalho Chehab                            prev_ln = None
1764d966dc65SMauro Carvalho Chehab
1765096f73abSJonathan Corbet                    self.config.log.debug("%d %s: %s",
1766f9cdbc57SMauro Carvalho Chehab                                          ln, state.name[self.state],
1767d966dc65SMauro Carvalho Chehab                                          line)
1768d966dc65SMauro Carvalho Chehab
176916740c29SMauro Carvalho Chehab                    # This is an optimization over the original script.
177016740c29SMauro Carvalho Chehab                    # There, when export_file was used for the same file,
177116740c29SMauro Carvalho Chehab                    # it was read twice. Here, we use the already-existing
177216740c29SMauro Carvalho Chehab                    # loop to parse exported symbols as well.
177316740c29SMauro Carvalho Chehab                    #
1774473734e0SJonathan Corbet                    if (self.state != state.NORMAL) or \
1775473734e0SJonathan Corbet                       not self.process_export(export_table, line):
1776d966dc65SMauro Carvalho Chehab                        # Hand this line to the appropriate state handler
1777cef8c781SJonathan Corbet                        self.state_actions[self.state](self, ln, line)
1778cef8c781SJonathan Corbet
1779292eca31SMauro Carvalho Chehab            self.emit_unused_warnings()
1780292eca31SMauro Carvalho Chehab
1781d966dc65SMauro Carvalho Chehab        except OSError:
1782d966dc65SMauro Carvalho Chehab            self.config.log.error(f"Error: Cannot open file {self.fname}")
178316740c29SMauro Carvalho Chehab
178416740c29SMauro Carvalho Chehab        return export_table, self.entries
1785