Testing_pikesquares/.venv/lib/python3.12/site-packages/pygments/lexers/tablegen.py
2025-02-02 00:02:31 -05:00

178 lines
3.9 KiB
Python

"""
pygments.lexers.tablegen
~~~~~~~~~~~~~~~~~~~~~~~~
Lexer for LLVM's TableGen DSL.
:copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from pygments.lexer import RegexLexer, include, words, using
from pygments.lexers.c_cpp import CppLexer
from pygments.token import Comment, Keyword, Name, Number, Operator, \
Punctuation, String, Text, Whitespace, Error
__all__ = ['TableGenLexer']
KEYWORDS = (
'assert',
'class',
'code',
'def',
'dump',
'else',
'foreach',
'defm',
'defset',
'defvar',
'field',
'if',
'in',
'include',
'let',
'multiclass',
'then',
)
KEYWORDS_CONST = (
'false',
'true',
)
KEYWORDS_TYPE = (
'bit',
'bits',
'dag',
'int',
'list',
'string',
)
BANG_OPERATORS = (
'add',
'and',
'cast',
'con',
'cond',
'dag',
'div',
'empty',
'eq',
'exists',
'filter',
'find',
'foldl',
'foreach',
'ge',
'getdagarg',
'getdagname',
'getdagop',
'gt',
'head',
'if',
'interleave',
'isa',
'le',
'listconcat',
'listremove',
'listsplat',
'logtwo',
'lt',
'mul',
'ne',
'not',
'or',
'range',
'repr',
'setdagarg',
'setdagname',
'setdagop',
'shl',
'size',
'sra',
'srl',
'strconcat',
'sub',
'subst',
'substr',
'tail',
'tolower',
'toupper',
'xor',
)
class TableGenLexer(RegexLexer):
"""
Lexer for TableGen
"""
name = 'TableGen'
url = 'https://llvm.org/docs/TableGen/ProgRef.html'
aliases = ['tablegen', 'td']
filenames = ['*.td']
version_added = '2.19'
tokens = {
'root': [
(r'\s+', Whitespace),
(r'/\*', Comment.Multiline, 'comment'),
(r'//.*?$', Comment.SingleLine),
(r'#(define|ifdef|ifndef|else|endif)', Comment.Preproc),
# Binary/hex numbers. Note that these take priority over names,
# which may begin with numbers.
(r'0b[10]+', Number.Bin),
(r'0x[0-9a-fA-F]+', Number.Hex),
# Keywords
(words(KEYWORDS, suffix=r'\b'), Keyword),
(words(KEYWORDS_CONST, suffix=r'\b'), Keyword.Constant),
(words(KEYWORDS_TYPE, suffix=r'\b'), Keyword.Type),
# Bang operators
(words(BANG_OPERATORS, prefix=r'\!', suffix=r'\b'), Operator),
# Unknown bang operators are an error
(r'![a-zA-Z]+', Error),
# Names and identifiers
(r'[0-9]*[a-zA-Z_][a-zA-Z_0-9]*', Name),
(r'\$[a-zA-Z_][a-zA-Z_0-9]*', Name.Variable),
# Place numbers after keywords. Names/identifiers may begin with
# numbers, and we want to parse 1X as one name token as opposed to
# a number and a name.
(r'[-\+]?[0-9]+', Number.Integer),
# String literals
(r'"', String, 'dqs'),
(r'\[\{', Text, 'codeblock'),
# Misc. punctuation
(r'[-+\[\]{}()<>\.,;:=?#]+', Punctuation),
],
'comment': [
(r'[^*/]+', Comment.Multiline),
(r'/\*', Comment.Multiline, '#push'),
(r'\*/', Comment.Multiline, '#pop'),
(r'[*/]', Comment.Multiline)
],
'strings': [
(r'\\[\\\'"tn]', String.Escape),
(r'[^\\"]+', String),
],
# Double-quoted string, a la C
'dqs': [
(r'"', String, '#pop'),
include('strings'),
],
# No escaping inside a code block - everything is literal
# Assume that the code inside a code block is C++. This isn't always
# true in TableGen, but is the far most common scenario.
'codeblock': [
(r'\}\]', Text, '#pop'),
(r'([^}]+|\}[^]])*', using(CppLexer)),
],
}