"""
    pygments.lexers.tablegen
    ~~~~~~~~~~~~~~~~~~~~~~~~

    Lexer for LLVM's TableGen DSL.

    :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

from pygments.lexer import RegexLexer, include, words, using
from pygments.lexers.c_cpp import CppLexer
from pygments.token import Comment, Keyword, Name, Number, Operator, \
    Punctuation, String, Text, Whitespace, Error

__all__ = ['TableGenLexer']

KEYWORDS = (
    'assert',
    'class',
    'code',
    'def',
    'dump',
    'else',
    'foreach',
    'defm',
    'defset',
    'defvar',
    'field',
    'if',
    'in',
    'include',
    'let',
    'multiclass',
    'then',
)

KEYWORDS_CONST = (
    'false',
    'true',
)
KEYWORDS_TYPE = (
    'bit',
    'bits',
    'dag',
    'int',
    'list',
    'string',
)

BANG_OPERATORS = (
    'add',
    'and',
    'cast',
    'con',
    'cond',
    'dag',
    'div',
    'empty',
    'eq',
    'exists',
    'filter',
    'find',
    'foldl',
    'foreach',
    'ge',
    'getdagarg',
    'getdagname',
    'getdagop',
    'gt',
    'head',
    'if',
    'interleave',
    'isa',
    'le',
    'listconcat',
    'listremove',
    'listsplat',
    'logtwo',
    'lt',
    'mul',
    'ne',
    'not',
    'or',
    'range',
    'repr',
    'setdagarg',
    'setdagname',
    'setdagop',
    'shl',
    'size',
    'sra',
    'srl',
    'strconcat',
    'sub',
    'subst',
    'substr',
    'tail',
    'tolower',
    'toupper',
    'xor',
)

class TableGenLexer(RegexLexer):
    """
    Lexer for TableGen
    """

    name = 'TableGen'
    url = 'https://llvm.org/docs/TableGen/ProgRef.html'
    aliases = ['tablegen', 'td']
    filenames = ['*.td']

    version_added = '2.19'

    tokens = {
        'root': [
            (r'\s+', Whitespace),

            (r'/\*', Comment.Multiline, 'comment'),
            (r'//.*?$', Comment.SingleLine),
            (r'#(define|ifdef|ifndef|else|endif)', Comment.Preproc),

            # Binary/hex numbers. Note that these take priority over names,
            # which may begin with numbers.
            (r'0b[10]+', Number.Bin),
            (r'0x[0-9a-fA-F]+', Number.Hex),

            # Keywords
            (words(KEYWORDS, suffix=r'\b'), Keyword),
            (words(KEYWORDS_CONST, suffix=r'\b'), Keyword.Constant),
            (words(KEYWORDS_TYPE, suffix=r'\b'), Keyword.Type),

            # Bang operators
            (words(BANG_OPERATORS, prefix=r'\!', suffix=r'\b'), Operator),
            # Unknown bang operators are an error
            (r'![a-zA-Z]+', Error),

            # Names and identifiers
            (r'[0-9]*[a-zA-Z_][a-zA-Z_0-9]*', Name),
            (r'\$[a-zA-Z_][a-zA-Z_0-9]*', Name.Variable),

            # Place numbers after keywords. Names/identifiers may begin with
            # numbers, and we want to parse 1X as one name token as opposed to
            # a number and a name.
            (r'[-\+]?[0-9]+', Number.Integer),

            # String literals
            (r'"', String, 'dqs'),
            (r'\[\{', Text, 'codeblock'),

            # Misc. punctuation
            (r'[-+\[\]{}()<>\.,;:=?#]+', Punctuation),
        ],
        'comment': [
            (r'[^*/]+', Comment.Multiline),
            (r'/\*', Comment.Multiline, '#push'),
            (r'\*/', Comment.Multiline, '#pop'),
            (r'[*/]', Comment.Multiline)
        ],
        'strings': [
            (r'\\[\\\'"tn]', String.Escape),
            (r'[^\\"]+', String),
        ],
        # Double-quoted string, a la C
        'dqs': [
            (r'"', String, '#pop'),
            include('strings'),
        ],
        # No escaping inside a code block - everything is literal
        # Assume that the code inside a code block is C++. This isn't always
        # true in TableGen, but is the far most common scenario.
        'codeblock': [
            (r'\}\]', Text, '#pop'),
            (r'([^}]+|\}[^]])*', using(CppLexer)),
        ],
    }