178 lines
3.9 KiB
Python
178 lines
3.9 KiB
Python
"""
|
|
pygments.lexers.tablegen
|
|
~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
Lexer for LLVM's TableGen DSL.
|
|
|
|
:copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
|
|
:license: BSD, see LICENSE for details.
|
|
"""
|
|
|
|
from pygments.lexer import RegexLexer, include, words, using
|
|
from pygments.lexers.c_cpp import CppLexer
|
|
from pygments.token import Comment, Keyword, Name, Number, Operator, \
|
|
Punctuation, String, Text, Whitespace, Error
|
|
|
|
__all__ = ['TableGenLexer']
|
|
|
|
KEYWORDS = (
|
|
'assert',
|
|
'class',
|
|
'code',
|
|
'def',
|
|
'dump',
|
|
'else',
|
|
'foreach',
|
|
'defm',
|
|
'defset',
|
|
'defvar',
|
|
'field',
|
|
'if',
|
|
'in',
|
|
'include',
|
|
'let',
|
|
'multiclass',
|
|
'then',
|
|
)
|
|
|
|
KEYWORDS_CONST = (
|
|
'false',
|
|
'true',
|
|
)
|
|
KEYWORDS_TYPE = (
|
|
'bit',
|
|
'bits',
|
|
'dag',
|
|
'int',
|
|
'list',
|
|
'string',
|
|
)
|
|
|
|
BANG_OPERATORS = (
|
|
'add',
|
|
'and',
|
|
'cast',
|
|
'con',
|
|
'cond',
|
|
'dag',
|
|
'div',
|
|
'empty',
|
|
'eq',
|
|
'exists',
|
|
'filter',
|
|
'find',
|
|
'foldl',
|
|
'foreach',
|
|
'ge',
|
|
'getdagarg',
|
|
'getdagname',
|
|
'getdagop',
|
|
'gt',
|
|
'head',
|
|
'if',
|
|
'interleave',
|
|
'isa',
|
|
'le',
|
|
'listconcat',
|
|
'listremove',
|
|
'listsplat',
|
|
'logtwo',
|
|
'lt',
|
|
'mul',
|
|
'ne',
|
|
'not',
|
|
'or',
|
|
'range',
|
|
'repr',
|
|
'setdagarg',
|
|
'setdagname',
|
|
'setdagop',
|
|
'shl',
|
|
'size',
|
|
'sra',
|
|
'srl',
|
|
'strconcat',
|
|
'sub',
|
|
'subst',
|
|
'substr',
|
|
'tail',
|
|
'tolower',
|
|
'toupper',
|
|
'xor',
|
|
)
|
|
|
|
class TableGenLexer(RegexLexer):
|
|
"""
|
|
Lexer for TableGen
|
|
"""
|
|
|
|
name = 'TableGen'
|
|
url = 'https://llvm.org/docs/TableGen/ProgRef.html'
|
|
aliases = ['tablegen', 'td']
|
|
filenames = ['*.td']
|
|
|
|
version_added = '2.19'
|
|
|
|
tokens = {
|
|
'root': [
|
|
(r'\s+', Whitespace),
|
|
|
|
(r'/\*', Comment.Multiline, 'comment'),
|
|
(r'//.*?$', Comment.SingleLine),
|
|
(r'#(define|ifdef|ifndef|else|endif)', Comment.Preproc),
|
|
|
|
# Binary/hex numbers. Note that these take priority over names,
|
|
# which may begin with numbers.
|
|
(r'0b[10]+', Number.Bin),
|
|
(r'0x[0-9a-fA-F]+', Number.Hex),
|
|
|
|
# Keywords
|
|
(words(KEYWORDS, suffix=r'\b'), Keyword),
|
|
(words(KEYWORDS_CONST, suffix=r'\b'), Keyword.Constant),
|
|
(words(KEYWORDS_TYPE, suffix=r'\b'), Keyword.Type),
|
|
|
|
# Bang operators
|
|
(words(BANG_OPERATORS, prefix=r'\!', suffix=r'\b'), Operator),
|
|
# Unknown bang operators are an error
|
|
(r'![a-zA-Z]+', Error),
|
|
|
|
# Names and identifiers
|
|
(r'[0-9]*[a-zA-Z_][a-zA-Z_0-9]*', Name),
|
|
(r'\$[a-zA-Z_][a-zA-Z_0-9]*', Name.Variable),
|
|
|
|
# Place numbers after keywords. Names/identifiers may begin with
|
|
# numbers, and we want to parse 1X as one name token as opposed to
|
|
# a number and a name.
|
|
(r'[-\+]?[0-9]+', Number.Integer),
|
|
|
|
# String literals
|
|
(r'"', String, 'dqs'),
|
|
(r'\[\{', Text, 'codeblock'),
|
|
|
|
# Misc. punctuation
|
|
(r'[-+\[\]{}()<>\.,;:=?#]+', Punctuation),
|
|
],
|
|
'comment': [
|
|
(r'[^*/]+', Comment.Multiline),
|
|
(r'/\*', Comment.Multiline, '#push'),
|
|
(r'\*/', Comment.Multiline, '#pop'),
|
|
(r'[*/]', Comment.Multiline)
|
|
],
|
|
'strings': [
|
|
(r'\\[\\\'"tn]', String.Escape),
|
|
(r'[^\\"]+', String),
|
|
],
|
|
# Double-quoted string, a la C
|
|
'dqs': [
|
|
(r'"', String, '#pop'),
|
|
include('strings'),
|
|
],
|
|
# No escaping inside a code block - everything is literal
|
|
# Assume that the code inside a code block is C++. This isn't always
|
|
# true in TableGen, but is the far most common scenario.
|
|
'codeblock': [
|
|
(r'\}\]', Text, '#pop'),
|
|
(r'([^}]+|\}[^]])*', using(CppLexer)),
|
|
],
|
|
}
|