fixed subscription table
This commit is contained in:
@@ -0,0 +1,27 @@
|
||||
__all__ = (
|
||||
"StateBlock",
|
||||
"paragraph",
|
||||
"heading",
|
||||
"lheading",
|
||||
"code",
|
||||
"fence",
|
||||
"hr",
|
||||
"list_block",
|
||||
"reference",
|
||||
"blockquote",
|
||||
"html_block",
|
||||
"table",
|
||||
)
|
||||
|
||||
from .blockquote import blockquote
|
||||
from .code import code
|
||||
from .fence import fence
|
||||
from .heading import heading
|
||||
from .hr import hr
|
||||
from .html_block import html_block
|
||||
from .lheading import lheading
|
||||
from .list import list_block
|
||||
from .paragraph import paragraph
|
||||
from .reference import reference
|
||||
from .state_block import StateBlock
|
||||
from .table import table
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,299 @@
|
||||
# Block quotes
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from ..common.utils import isStrSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug(
|
||||
"entering blockquote: %s, %s, %s, %s", state, startLine, endLine, silent
|
||||
)
|
||||
|
||||
oldLineMax = state.lineMax
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
max = state.eMarks[startLine]
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
# check the block quote marker
|
||||
try:
|
||||
if state.src[pos] != ">":
|
||||
return False
|
||||
except IndexError:
|
||||
return False
|
||||
pos += 1
|
||||
|
||||
# we know that it's going to be a valid blockquote,
|
||||
# so no point trying to find the end of it in silent mode
|
||||
if silent:
|
||||
return True
|
||||
|
||||
# set offset past spaces and ">"
|
||||
initial = offset = state.sCount[startLine] + 1
|
||||
|
||||
try:
|
||||
second_char: str | None = state.src[pos]
|
||||
except IndexError:
|
||||
second_char = None
|
||||
|
||||
# skip one optional space after '>'
|
||||
if second_char == " ":
|
||||
# ' > test '
|
||||
# ^ -- position start of line here:
|
||||
pos += 1
|
||||
initial += 1
|
||||
offset += 1
|
||||
adjustTab = False
|
||||
spaceAfterMarker = True
|
||||
elif second_char == "\t":
|
||||
spaceAfterMarker = True
|
||||
|
||||
if (state.bsCount[startLine] + offset) % 4 == 3:
|
||||
# ' >\t test '
|
||||
# ^ -- position start of line here (tab has width==1)
|
||||
pos += 1
|
||||
initial += 1
|
||||
offset += 1
|
||||
adjustTab = False
|
||||
else:
|
||||
# ' >\t test '
|
||||
# ^ -- position start of line here + shift bsCount slightly
|
||||
# to make extra space appear
|
||||
adjustTab = True
|
||||
|
||||
else:
|
||||
spaceAfterMarker = False
|
||||
|
||||
oldBMarks = [state.bMarks[startLine]]
|
||||
state.bMarks[startLine] = pos
|
||||
|
||||
while pos < max:
|
||||
ch = state.src[pos]
|
||||
|
||||
if isStrSpace(ch):
|
||||
if ch == "\t":
|
||||
offset += (
|
||||
4
|
||||
- (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4
|
||||
)
|
||||
else:
|
||||
offset += 1
|
||||
|
||||
else:
|
||||
break
|
||||
|
||||
pos += 1
|
||||
|
||||
oldBSCount = [state.bsCount[startLine]]
|
||||
state.bsCount[startLine] = (
|
||||
state.sCount[startLine] + 1 + (1 if spaceAfterMarker else 0)
|
||||
)
|
||||
|
||||
lastLineEmpty = pos >= max
|
||||
|
||||
oldSCount = [state.sCount[startLine]]
|
||||
state.sCount[startLine] = offset - initial
|
||||
|
||||
oldTShift = [state.tShift[startLine]]
|
||||
state.tShift[startLine] = pos - state.bMarks[startLine]
|
||||
|
||||
terminatorRules = state.md.block.ruler.getRules("blockquote")
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "blockquote"
|
||||
|
||||
# Search the end of the block
|
||||
#
|
||||
# Block ends with either:
|
||||
# 1. an empty line outside:
|
||||
# ```
|
||||
# > test
|
||||
#
|
||||
# ```
|
||||
# 2. an empty line inside:
|
||||
# ```
|
||||
# >
|
||||
# test
|
||||
# ```
|
||||
# 3. another tag:
|
||||
# ```
|
||||
# > test
|
||||
# - - -
|
||||
# ```
|
||||
|
||||
# for (nextLine = startLine + 1; nextLine < endLine; nextLine++) {
|
||||
nextLine = startLine + 1
|
||||
while nextLine < endLine:
|
||||
# check if it's outdented, i.e. it's inside list item and indented
|
||||
# less than said list item:
|
||||
#
|
||||
# ```
|
||||
# 1. anything
|
||||
# > current blockquote
|
||||
# 2. checking this line
|
||||
# ```
|
||||
isOutdented = state.sCount[nextLine] < state.blkIndent
|
||||
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
max = state.eMarks[nextLine]
|
||||
|
||||
if pos >= max:
|
||||
# Case 1: line is not inside the blockquote, and this line is empty.
|
||||
break
|
||||
|
||||
evaluatesTrue = state.src[pos] == ">" and not isOutdented
|
||||
pos += 1
|
||||
if evaluatesTrue:
|
||||
# This line is inside the blockquote.
|
||||
|
||||
# set offset past spaces and ">"
|
||||
initial = offset = state.sCount[nextLine] + 1
|
||||
|
||||
try:
|
||||
next_char: str | None = state.src[pos]
|
||||
except IndexError:
|
||||
next_char = None
|
||||
|
||||
# skip one optional space after '>'
|
||||
if next_char == " ":
|
||||
# ' > test '
|
||||
# ^ -- position start of line here:
|
||||
pos += 1
|
||||
initial += 1
|
||||
offset += 1
|
||||
adjustTab = False
|
||||
spaceAfterMarker = True
|
||||
elif next_char == "\t":
|
||||
spaceAfterMarker = True
|
||||
|
||||
if (state.bsCount[nextLine] + offset) % 4 == 3:
|
||||
# ' >\t test '
|
||||
# ^ -- position start of line here (tab has width==1)
|
||||
pos += 1
|
||||
initial += 1
|
||||
offset += 1
|
||||
adjustTab = False
|
||||
else:
|
||||
# ' >\t test '
|
||||
# ^ -- position start of line here + shift bsCount slightly
|
||||
# to make extra space appear
|
||||
adjustTab = True
|
||||
|
||||
else:
|
||||
spaceAfterMarker = False
|
||||
|
||||
oldBMarks.append(state.bMarks[nextLine])
|
||||
state.bMarks[nextLine] = pos
|
||||
|
||||
while pos < max:
|
||||
ch = state.src[pos]
|
||||
|
||||
if isStrSpace(ch):
|
||||
if ch == "\t":
|
||||
offset += (
|
||||
4
|
||||
- (
|
||||
offset
|
||||
+ state.bsCount[nextLine]
|
||||
+ (1 if adjustTab else 0)
|
||||
)
|
||||
% 4
|
||||
)
|
||||
else:
|
||||
offset += 1
|
||||
else:
|
||||
break
|
||||
|
||||
pos += 1
|
||||
|
||||
lastLineEmpty = pos >= max
|
||||
|
||||
oldBSCount.append(state.bsCount[nextLine])
|
||||
state.bsCount[nextLine] = (
|
||||
state.sCount[nextLine] + 1 + (1 if spaceAfterMarker else 0)
|
||||
)
|
||||
|
||||
oldSCount.append(state.sCount[nextLine])
|
||||
state.sCount[nextLine] = offset - initial
|
||||
|
||||
oldTShift.append(state.tShift[nextLine])
|
||||
state.tShift[nextLine] = pos - state.bMarks[nextLine]
|
||||
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Case 2: line is not inside the blockquote, and the last line was empty.
|
||||
if lastLineEmpty:
|
||||
break
|
||||
|
||||
# Case 3: another tag found.
|
||||
terminate = False
|
||||
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
# Quirk to enforce "hard termination mode" for paragraphs;
|
||||
# normally if you call `tokenize(state, startLine, nextLine)`,
|
||||
# paragraphs will look below nextLine for paragraph continuation,
|
||||
# but if blockquote is terminated by another tag, they shouldn't
|
||||
state.lineMax = nextLine
|
||||
|
||||
if state.blkIndent != 0:
|
||||
# state.blkIndent was non-zero, we now set it to zero,
|
||||
# so we need to re-calculate all offsets to appear as
|
||||
# if indent wasn't changed
|
||||
oldBMarks.append(state.bMarks[nextLine])
|
||||
oldBSCount.append(state.bsCount[nextLine])
|
||||
oldTShift.append(state.tShift[nextLine])
|
||||
oldSCount.append(state.sCount[nextLine])
|
||||
state.sCount[nextLine] -= state.blkIndent
|
||||
|
||||
break
|
||||
|
||||
oldBMarks.append(state.bMarks[nextLine])
|
||||
oldBSCount.append(state.bsCount[nextLine])
|
||||
oldTShift.append(state.tShift[nextLine])
|
||||
oldSCount.append(state.sCount[nextLine])
|
||||
|
||||
# A negative indentation means that this is a paragraph continuation
|
||||
#
|
||||
state.sCount[nextLine] = -1
|
||||
|
||||
nextLine += 1
|
||||
|
||||
oldIndent = state.blkIndent
|
||||
state.blkIndent = 0
|
||||
|
||||
token = state.push("blockquote_open", "blockquote", 1)
|
||||
token.markup = ">"
|
||||
token.map = lines = [startLine, 0]
|
||||
|
||||
state.md.block.tokenize(state, startLine, nextLine)
|
||||
|
||||
token = state.push("blockquote_close", "blockquote", -1)
|
||||
token.markup = ">"
|
||||
|
||||
state.lineMax = oldLineMax
|
||||
state.parentType = oldParentType
|
||||
lines[1] = state.line
|
||||
|
||||
# Restore original tShift; this might not be necessary since the parser
|
||||
# has already been here, but just to make sure we can do that.
|
||||
for i, item in enumerate(oldTShift):
|
||||
state.bMarks[i + startLine] = oldBMarks[i]
|
||||
state.tShift[i + startLine] = item
|
||||
state.sCount[i + startLine] = oldSCount[i]
|
||||
state.bsCount[i + startLine] = oldBSCount[i]
|
||||
|
||||
state.blkIndent = oldIndent
|
||||
|
||||
return True
|
@@ -0,0 +1,35 @@
|
||||
"""Code block (4 spaces padded)."""
|
||||
import logging
|
||||
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def code(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering code: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
if not state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
last = nextLine = startLine + 1
|
||||
|
||||
while nextLine < endLine:
|
||||
if state.isEmpty(nextLine):
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
if state.is_code_block(nextLine):
|
||||
nextLine += 1
|
||||
last = nextLine
|
||||
continue
|
||||
|
||||
break
|
||||
|
||||
state.line = last
|
||||
|
||||
token = state.push("code_block", "code", 0)
|
||||
token.content = state.getLines(startLine, last, 4 + state.blkIndent, False) + "\n"
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
return True
|
@@ -0,0 +1,101 @@
|
||||
# fences (``` lang, ~~~ lang)
|
||||
import logging
|
||||
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering fence: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
haveEndMarker = False
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
if pos + 3 > maximum:
|
||||
return False
|
||||
|
||||
marker = state.src[pos]
|
||||
|
||||
if marker not in ("~", "`"):
|
||||
return False
|
||||
|
||||
# scan marker length
|
||||
mem = pos
|
||||
pos = state.skipCharsStr(pos, marker)
|
||||
|
||||
length = pos - mem
|
||||
|
||||
if length < 3:
|
||||
return False
|
||||
|
||||
markup = state.src[mem:pos]
|
||||
params = state.src[pos:maximum]
|
||||
|
||||
if marker == "`" and marker in params:
|
||||
return False
|
||||
|
||||
# Since start is found, we can report success here in validation mode
|
||||
if silent:
|
||||
return True
|
||||
|
||||
# search end of block
|
||||
nextLine = startLine
|
||||
|
||||
while True:
|
||||
nextLine += 1
|
||||
if nextLine >= endLine:
|
||||
# unclosed block should be autoclosed by end of document.
|
||||
# also block seems to be autoclosed by end of parent
|
||||
break
|
||||
|
||||
pos = mem = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
maximum = state.eMarks[nextLine]
|
||||
|
||||
if pos < maximum and state.sCount[nextLine] < state.blkIndent:
|
||||
# non-empty line with negative indent should stop the list:
|
||||
# - ```
|
||||
# test
|
||||
break
|
||||
|
||||
try:
|
||||
if state.src[pos] != marker:
|
||||
continue
|
||||
except IndexError:
|
||||
break
|
||||
|
||||
if state.is_code_block(nextLine):
|
||||
continue
|
||||
|
||||
pos = state.skipCharsStr(pos, marker)
|
||||
|
||||
# closing code fence must be at least as long as the opening one
|
||||
if pos - mem < length:
|
||||
continue
|
||||
|
||||
# make sure tail has spaces only
|
||||
pos = state.skipSpaces(pos)
|
||||
|
||||
if pos < maximum:
|
||||
continue
|
||||
|
||||
haveEndMarker = True
|
||||
# found!
|
||||
break
|
||||
|
||||
# If a fence has heading spaces, they should be removed from its inner block
|
||||
length = state.sCount[startLine]
|
||||
|
||||
state.line = nextLine + (1 if haveEndMarker else 0)
|
||||
|
||||
token = state.push("fence", "code", 0)
|
||||
token.info = params
|
||||
token.content = state.getLines(startLine + 1, nextLine, length, True)
|
||||
token.markup = markup
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
return True
|
@@ -0,0 +1,68 @@
|
||||
""" Atex heading (#, ##, ...) """
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from ..common.utils import isStrSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering heading: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
ch: str | None = state.src[pos]
|
||||
|
||||
if ch != "#" or pos >= maximum:
|
||||
return False
|
||||
|
||||
# count heading level
|
||||
level = 1
|
||||
pos += 1
|
||||
try:
|
||||
ch = state.src[pos]
|
||||
except IndexError:
|
||||
ch = None
|
||||
while ch == "#" and pos < maximum and level <= 6:
|
||||
level += 1
|
||||
pos += 1
|
||||
try:
|
||||
ch = state.src[pos]
|
||||
except IndexError:
|
||||
ch = None
|
||||
|
||||
if level > 6 or (pos < maximum and not isStrSpace(ch)):
|
||||
return False
|
||||
|
||||
if silent:
|
||||
return True
|
||||
|
||||
# Let's cut tails like ' ### ' from the end of string
|
||||
|
||||
maximum = state.skipSpacesBack(maximum, pos)
|
||||
tmp = state.skipCharsStrBack(maximum, "#", pos)
|
||||
if tmp > pos and isStrSpace(state.src[tmp - 1]):
|
||||
maximum = tmp
|
||||
|
||||
state.line = startLine + 1
|
||||
|
||||
token = state.push("heading_open", "h" + str(level), 1)
|
||||
token.markup = "########"[:level]
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
token.content = state.src[pos:maximum].strip()
|
||||
token.map = [startLine, state.line]
|
||||
token.children = []
|
||||
|
||||
token = state.push("heading_close", "h" + str(level), -1)
|
||||
token.markup = "########"[:level]
|
||||
|
||||
return True
|
@@ -0,0 +1,55 @@
|
||||
"""Horizontal rule
|
||||
|
||||
At least 3 of these characters on a line * - _
|
||||
"""
|
||||
import logging
|
||||
|
||||
from ..common.utils import isStrSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def hr(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering hr: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
try:
|
||||
marker = state.src[pos]
|
||||
except IndexError:
|
||||
return False
|
||||
pos += 1
|
||||
|
||||
# Check hr marker
|
||||
if marker not in ("*", "-", "_"):
|
||||
return False
|
||||
|
||||
# markers can be mixed with spaces, but there should be at least 3 of them
|
||||
|
||||
cnt = 1
|
||||
while pos < maximum:
|
||||
ch = state.src[pos]
|
||||
pos += 1
|
||||
if ch != marker and not isStrSpace(ch):
|
||||
return False
|
||||
if ch == marker:
|
||||
cnt += 1
|
||||
|
||||
if cnt < 3:
|
||||
return False
|
||||
|
||||
if silent:
|
||||
return True
|
||||
|
||||
state.line = startLine + 1
|
||||
|
||||
token = state.push("hr", "hr", 0)
|
||||
token.map = [startLine, state.line]
|
||||
token.markup = marker * (cnt + 1)
|
||||
|
||||
return True
|
@@ -0,0 +1,90 @@
|
||||
# HTML block
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from ..common.html_blocks import block_names
|
||||
from ..common.html_re import HTML_OPEN_CLOSE_TAG_STR
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
# An array of opening and corresponding closing sequences for html tags,
|
||||
# last argument defines whether it can terminate a paragraph or not
|
||||
HTML_SEQUENCES: list[tuple[re.Pattern[str], re.Pattern[str], bool]] = [
|
||||
(
|
||||
re.compile(r"^<(script|pre|style|textarea)(?=(\s|>|$))", re.IGNORECASE),
|
||||
re.compile(r"<\/(script|pre|style|textarea)>", re.IGNORECASE),
|
||||
True,
|
||||
),
|
||||
(re.compile(r"^<!--"), re.compile(r"-->"), True),
|
||||
(re.compile(r"^<\?"), re.compile(r"\?>"), True),
|
||||
(re.compile(r"^<![A-Z]"), re.compile(r">"), True),
|
||||
(re.compile(r"^<!\[CDATA\["), re.compile(r"\]\]>"), True),
|
||||
(
|
||||
re.compile("^</?(" + "|".join(block_names) + ")(?=(\\s|/?>|$))", re.IGNORECASE),
|
||||
re.compile(r"^$"),
|
||||
True,
|
||||
),
|
||||
(re.compile(HTML_OPEN_CLOSE_TAG_STR + "\\s*$"), re.compile(r"^$"), False),
|
||||
]
|
||||
|
||||
|
||||
def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug(
|
||||
"entering html_block: %s, %s, %s, %s", state, startLine, endLine, silent
|
||||
)
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
if not state.md.options.get("html", None):
|
||||
return False
|
||||
|
||||
if state.src[pos] != "<":
|
||||
return False
|
||||
|
||||
lineText = state.src[pos:maximum]
|
||||
|
||||
html_seq = None
|
||||
for HTML_SEQUENCE in HTML_SEQUENCES:
|
||||
if HTML_SEQUENCE[0].search(lineText):
|
||||
html_seq = HTML_SEQUENCE
|
||||
break
|
||||
|
||||
if not html_seq:
|
||||
return False
|
||||
|
||||
if silent:
|
||||
# true if this sequence can be a terminator, false otherwise
|
||||
return html_seq[2]
|
||||
|
||||
nextLine = startLine + 1
|
||||
|
||||
# If we are here - we detected HTML block.
|
||||
# Let's roll down till block end.
|
||||
if not html_seq[1].search(lineText):
|
||||
while nextLine < endLine:
|
||||
if state.sCount[nextLine] < state.blkIndent:
|
||||
break
|
||||
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
maximum = state.eMarks[nextLine]
|
||||
lineText = state.src[pos:maximum]
|
||||
|
||||
if html_seq[1].search(lineText):
|
||||
if len(lineText) != 0:
|
||||
nextLine += 1
|
||||
break
|
||||
nextLine += 1
|
||||
|
||||
state.line = nextLine
|
||||
|
||||
token = state.push("html_block", "", 0)
|
||||
token.map = [startLine, nextLine]
|
||||
token.content = state.getLines(startLine, nextLine, state.blkIndent, True)
|
||||
|
||||
return True
|
@@ -0,0 +1,86 @@
|
||||
# lheading (---, ==)
|
||||
import logging
|
||||
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering lheading: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
level = None
|
||||
nextLine = startLine + 1
|
||||
ruler = state.md.block.ruler
|
||||
terminatorRules = ruler.getRules("paragraph")
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "paragraph" # use paragraph to match terminatorRules
|
||||
|
||||
# jump line-by-line until empty one or EOF
|
||||
while nextLine < endLine and not state.isEmpty(nextLine):
|
||||
# this would be a code block normally, but after paragraph
|
||||
# it's considered a lazy continuation regardless of what's there
|
||||
if state.sCount[nextLine] - state.blkIndent > 3:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Check for underline in setext header
|
||||
if state.sCount[nextLine] >= state.blkIndent:
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
maximum = state.eMarks[nextLine]
|
||||
|
||||
if pos < maximum:
|
||||
marker = state.src[pos]
|
||||
|
||||
if marker in ("-", "="):
|
||||
pos = state.skipCharsStr(pos, marker)
|
||||
pos = state.skipSpaces(pos)
|
||||
|
||||
# /* = */
|
||||
if pos >= maximum:
|
||||
level = 1 if marker == "=" else 2
|
||||
break
|
||||
|
||||
# quirk for blockquotes, this line should already be checked by that rule
|
||||
if state.sCount[nextLine] < 0:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Some tags can terminate paragraph without empty line.
|
||||
terminate = False
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
if terminate:
|
||||
break
|
||||
|
||||
nextLine += 1
|
||||
|
||||
if not level:
|
||||
# Didn't find valid underline
|
||||
return False
|
||||
|
||||
content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
|
||||
|
||||
state.line = nextLine + 1
|
||||
|
||||
token = state.push("heading_open", "h" + str(level), 1)
|
||||
token.markup = marker
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
token.content = content
|
||||
token.map = [startLine, state.line - 1]
|
||||
token.children = []
|
||||
|
||||
token = state.push("heading_close", "h" + str(level), -1)
|
||||
token.markup = marker
|
||||
|
||||
state.parentType = oldParentType
|
||||
|
||||
return True
|
@@ -0,0 +1,345 @@
|
||||
# Lists
|
||||
import logging
|
||||
|
||||
from ..common.utils import isStrSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Search `[-+*][\n ]`, returns next pos after marker on success
|
||||
# or -1 on fail.
|
||||
def skipBulletListMarker(state: StateBlock, startLine: int) -> int:
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
try:
|
||||
marker = state.src[pos]
|
||||
except IndexError:
|
||||
return -1
|
||||
pos += 1
|
||||
|
||||
if marker not in ("*", "-", "+"):
|
||||
return -1
|
||||
|
||||
if pos < maximum:
|
||||
ch = state.src[pos]
|
||||
|
||||
if not isStrSpace(ch):
|
||||
# " -test " - is not a list item
|
||||
return -1
|
||||
|
||||
return pos
|
||||
|
||||
|
||||
# Search `\d+[.)][\n ]`, returns next pos after marker on success
|
||||
# or -1 on fail.
|
||||
def skipOrderedListMarker(state: StateBlock, startLine: int) -> int:
|
||||
start = state.bMarks[startLine] + state.tShift[startLine]
|
||||
pos = start
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
# List marker should have at least 2 chars (digit + dot)
|
||||
if pos + 1 >= maximum:
|
||||
return -1
|
||||
|
||||
ch = state.src[pos]
|
||||
pos += 1
|
||||
|
||||
ch_ord = ord(ch)
|
||||
# /* 0 */ /* 9 */
|
||||
if ch_ord < 0x30 or ch_ord > 0x39:
|
||||
return -1
|
||||
|
||||
while True:
|
||||
# EOL -> fail
|
||||
if pos >= maximum:
|
||||
return -1
|
||||
|
||||
ch = state.src[pos]
|
||||
pos += 1
|
||||
|
||||
# /* 0 */ /* 9 */
|
||||
ch_ord = ord(ch)
|
||||
if ch_ord >= 0x30 and ch_ord <= 0x39:
|
||||
# List marker should have no more than 9 digits
|
||||
# (prevents integer overflow in browsers)
|
||||
if pos - start >= 10:
|
||||
return -1
|
||||
|
||||
continue
|
||||
|
||||
# found valid marker
|
||||
if ch in (")", "."):
|
||||
break
|
||||
|
||||
return -1
|
||||
|
||||
if pos < maximum:
|
||||
ch = state.src[pos]
|
||||
|
||||
if not isStrSpace(ch):
|
||||
# " 1.test " - is not a list item
|
||||
return -1
|
||||
|
||||
return pos
|
||||
|
||||
|
||||
def markTightParagraphs(state: StateBlock, idx: int) -> None:
|
||||
level = state.level + 2
|
||||
|
||||
i = idx + 2
|
||||
length = len(state.tokens) - 2
|
||||
while i < length:
|
||||
if state.tokens[i].level == level and state.tokens[i].type == "paragraph_open":
|
||||
state.tokens[i + 2].hidden = True
|
||||
state.tokens[i].hidden = True
|
||||
i += 2
|
||||
i += 1
|
||||
|
||||
|
||||
def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering list: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
isTerminatingParagraph = False
|
||||
tight = True
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
# Special case:
|
||||
# - item 1
|
||||
# - item 2
|
||||
# - item 3
|
||||
# - item 4
|
||||
# - this one is a paragraph continuation
|
||||
if (
|
||||
state.listIndent >= 0
|
||||
and state.sCount[startLine] - state.listIndent >= 4
|
||||
and state.sCount[startLine] < state.blkIndent
|
||||
):
|
||||
return False
|
||||
|
||||
# limit conditions when list can interrupt
|
||||
# a paragraph (validation mode only)
|
||||
# Next list item should still terminate previous list item
|
||||
#
|
||||
# This code can fail if plugins use blkIndent as well as lists,
|
||||
# but I hope the spec gets fixed long before that happens.
|
||||
#
|
||||
if (
|
||||
silent
|
||||
and state.parentType == "paragraph"
|
||||
and state.sCount[startLine] >= state.blkIndent
|
||||
):
|
||||
isTerminatingParagraph = True
|
||||
|
||||
# Detect list type and position after marker
|
||||
posAfterMarker = skipOrderedListMarker(state, startLine)
|
||||
if posAfterMarker >= 0:
|
||||
isOrdered = True
|
||||
start = state.bMarks[startLine] + state.tShift[startLine]
|
||||
markerValue = int(state.src[start : posAfterMarker - 1])
|
||||
|
||||
# If we're starting a new ordered list right after
|
||||
# a paragraph, it should start with 1.
|
||||
if isTerminatingParagraph and markerValue != 1:
|
||||
return False
|
||||
else:
|
||||
posAfterMarker = skipBulletListMarker(state, startLine)
|
||||
if posAfterMarker >= 0:
|
||||
isOrdered = False
|
||||
else:
|
||||
return False
|
||||
|
||||
# If we're starting a new unordered list right after
|
||||
# a paragraph, first line should not be empty.
|
||||
if (
|
||||
isTerminatingParagraph
|
||||
and state.skipSpaces(posAfterMarker) >= state.eMarks[startLine]
|
||||
):
|
||||
return False
|
||||
|
||||
# We should terminate list on style change. Remember first one to compare.
|
||||
markerChar = state.src[posAfterMarker - 1]
|
||||
|
||||
# For validation mode we can terminate immediately
|
||||
if silent:
|
||||
return True
|
||||
|
||||
# Start list
|
||||
listTokIdx = len(state.tokens)
|
||||
|
||||
if isOrdered:
|
||||
token = state.push("ordered_list_open", "ol", 1)
|
||||
if markerValue != 1:
|
||||
token.attrs = {"start": markerValue}
|
||||
|
||||
else:
|
||||
token = state.push("bullet_list_open", "ul", 1)
|
||||
|
||||
token.map = listLines = [startLine, 0]
|
||||
token.markup = markerChar
|
||||
|
||||
#
|
||||
# Iterate list items
|
||||
#
|
||||
|
||||
nextLine = startLine
|
||||
prevEmptyEnd = False
|
||||
terminatorRules = state.md.block.ruler.getRules("list")
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "list"
|
||||
|
||||
while nextLine < endLine:
|
||||
pos = posAfterMarker
|
||||
maximum = state.eMarks[nextLine]
|
||||
|
||||
initial = offset = (
|
||||
state.sCount[nextLine]
|
||||
+ posAfterMarker
|
||||
- (state.bMarks[startLine] + state.tShift[startLine])
|
||||
)
|
||||
|
||||
while pos < maximum:
|
||||
ch = state.src[pos]
|
||||
|
||||
if ch == "\t":
|
||||
offset += 4 - (offset + state.bsCount[nextLine]) % 4
|
||||
elif ch == " ":
|
||||
offset += 1
|
||||
else:
|
||||
break
|
||||
|
||||
pos += 1
|
||||
|
||||
contentStart = pos
|
||||
|
||||
# trimming space in "- \n 3" case, indent is 1 here
|
||||
indentAfterMarker = 1 if contentStart >= maximum else offset - initial
|
||||
|
||||
# If we have more than 4 spaces, the indent is 1
|
||||
# (the rest is just indented code block)
|
||||
if indentAfterMarker > 4:
|
||||
indentAfterMarker = 1
|
||||
|
||||
# " - test"
|
||||
# ^^^^^ - calculating total length of this thing
|
||||
indent = initial + indentAfterMarker
|
||||
|
||||
# Run subparser & write tokens
|
||||
token = state.push("list_item_open", "li", 1)
|
||||
token.markup = markerChar
|
||||
token.map = itemLines = [startLine, 0]
|
||||
if isOrdered:
|
||||
token.info = state.src[start : posAfterMarker - 1]
|
||||
|
||||
# change current state, then restore it after parser subcall
|
||||
oldTight = state.tight
|
||||
oldTShift = state.tShift[startLine]
|
||||
oldSCount = state.sCount[startLine]
|
||||
|
||||
# - example list
|
||||
# ^ listIndent position will be here
|
||||
# ^ blkIndent position will be here
|
||||
#
|
||||
oldListIndent = state.listIndent
|
||||
state.listIndent = state.blkIndent
|
||||
state.blkIndent = indent
|
||||
|
||||
state.tight = True
|
||||
state.tShift[startLine] = contentStart - state.bMarks[startLine]
|
||||
state.sCount[startLine] = offset
|
||||
|
||||
if contentStart >= maximum and state.isEmpty(startLine + 1):
|
||||
# workaround for this case
|
||||
# (list item is empty, list terminates before "foo"):
|
||||
# ~~~~~~~~
|
||||
# -
|
||||
#
|
||||
# foo
|
||||
# ~~~~~~~~
|
||||
state.line = min(state.line + 2, endLine)
|
||||
else:
|
||||
# NOTE in list.js this was:
|
||||
# state.md.block.tokenize(state, startLine, endLine, True)
|
||||
# but tokeniz does not take the final parameter
|
||||
state.md.block.tokenize(state, startLine, endLine)
|
||||
|
||||
# If any of list item is tight, mark list as tight
|
||||
if (not state.tight) or prevEmptyEnd:
|
||||
tight = False
|
||||
|
||||
# Item become loose if finish with empty line,
|
||||
# but we should filter last element, because it means list finish
|
||||
prevEmptyEnd = (state.line - startLine) > 1 and state.isEmpty(state.line - 1)
|
||||
|
||||
state.blkIndent = state.listIndent
|
||||
state.listIndent = oldListIndent
|
||||
state.tShift[startLine] = oldTShift
|
||||
state.sCount[startLine] = oldSCount
|
||||
state.tight = oldTight
|
||||
|
||||
token = state.push("list_item_close", "li", -1)
|
||||
token.markup = markerChar
|
||||
|
||||
nextLine = startLine = state.line
|
||||
itemLines[1] = nextLine
|
||||
|
||||
if nextLine >= endLine:
|
||||
break
|
||||
|
||||
contentStart = state.bMarks[startLine]
|
||||
|
||||
#
|
||||
# Try to check if list is terminated or continued.
|
||||
#
|
||||
if state.sCount[nextLine] < state.blkIndent:
|
||||
break
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
break
|
||||
|
||||
# fail if terminating block found
|
||||
terminate = False
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
break
|
||||
|
||||
# fail if list has another type
|
||||
if isOrdered:
|
||||
posAfterMarker = skipOrderedListMarker(state, nextLine)
|
||||
if posAfterMarker < 0:
|
||||
break
|
||||
start = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
else:
|
||||
posAfterMarker = skipBulletListMarker(state, nextLine)
|
||||
if posAfterMarker < 0:
|
||||
break
|
||||
|
||||
if markerChar != state.src[posAfterMarker - 1]:
|
||||
break
|
||||
|
||||
# Finalize list
|
||||
if isOrdered:
|
||||
token = state.push("ordered_list_close", "ol", -1)
|
||||
else:
|
||||
token = state.push("bullet_list_close", "ul", -1)
|
||||
|
||||
token.markup = markerChar
|
||||
|
||||
listLines[1] = nextLine
|
||||
state.line = nextLine
|
||||
|
||||
state.parentType = oldParentType
|
||||
|
||||
# mark paragraphs tight if needed
|
||||
if tight:
|
||||
markTightParagraphs(state, listTokIdx)
|
||||
|
||||
return True
|
@@ -0,0 +1,65 @@
|
||||
"""Paragraph."""
|
||||
import logging
|
||||
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug(
|
||||
"entering paragraph: %s, %s, %s, %s", state, startLine, endLine, silent
|
||||
)
|
||||
|
||||
nextLine = startLine + 1
|
||||
ruler = state.md.block.ruler
|
||||
terminatorRules = ruler.getRules("paragraph")
|
||||
endLine = state.lineMax
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "paragraph"
|
||||
|
||||
# jump line-by-line until empty one or EOF
|
||||
while nextLine < endLine:
|
||||
if state.isEmpty(nextLine):
|
||||
break
|
||||
# this would be a code block normally, but after paragraph
|
||||
# it's considered a lazy continuation regardless of what's there
|
||||
if state.sCount[nextLine] - state.blkIndent > 3:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# quirk for blockquotes, this line should already be checked by that rule
|
||||
if state.sCount[nextLine] < 0:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Some tags can terminate paragraph without empty line.
|
||||
terminate = False
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
break
|
||||
|
||||
nextLine += 1
|
||||
|
||||
content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
|
||||
|
||||
state.line = nextLine
|
||||
|
||||
token = state.push("paragraph_open", "p", 1)
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
token.content = content
|
||||
token.map = [startLine, state.line]
|
||||
token.children = []
|
||||
|
||||
token = state.push("paragraph_close", "p", -1)
|
||||
|
||||
state.parentType = oldParentType
|
||||
|
||||
return True
|
@@ -0,0 +1,215 @@
|
||||
import logging
|
||||
|
||||
from ..common.utils import charCodeAt, isSpace, normalizeReference
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug(
|
||||
"entering reference: %s, %s, %s, %s", state, startLine, _endLine, silent
|
||||
)
|
||||
|
||||
lines = 0
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
nextLine = startLine + 1
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
if state.src[pos] != "[":
|
||||
return False
|
||||
|
||||
# Simple check to quickly interrupt scan on [link](url) at the start of line.
|
||||
# Can be useful on practice: https:#github.com/markdown-it/markdown-it/issues/54
|
||||
while pos < maximum:
|
||||
# /* ] */ /* \ */ /* : */
|
||||
if state.src[pos] == "]" and state.src[pos - 1] != "\\":
|
||||
if pos + 1 == maximum:
|
||||
return False
|
||||
if state.src[pos + 1] != ":":
|
||||
return False
|
||||
break
|
||||
pos += 1
|
||||
|
||||
endLine = state.lineMax
|
||||
|
||||
# jump line-by-line until empty one or EOF
|
||||
terminatorRules = state.md.block.ruler.getRules("reference")
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "reference"
|
||||
|
||||
while nextLine < endLine and not state.isEmpty(nextLine):
|
||||
# this would be a code block normally, but after paragraph
|
||||
# it's considered a lazy continuation regardless of what's there
|
||||
if state.sCount[nextLine] - state.blkIndent > 3:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# quirk for blockquotes, this line should already be checked by that rule
|
||||
if state.sCount[nextLine] < 0:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Some tags can terminate paragraph without empty line.
|
||||
terminate = False
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
break
|
||||
|
||||
nextLine += 1
|
||||
|
||||
string = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
|
||||
maximum = len(string)
|
||||
|
||||
labelEnd = None
|
||||
pos = 1
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if ch == 0x5B: # /* [ */
|
||||
return False
|
||||
elif ch == 0x5D: # /* ] */
|
||||
labelEnd = pos
|
||||
break
|
||||
elif ch == 0x0A: # /* \n */
|
||||
lines += 1
|
||||
elif ch == 0x5C: # /* \ */
|
||||
pos += 1
|
||||
if pos < maximum and charCodeAt(string, pos) == 0x0A:
|
||||
lines += 1
|
||||
pos += 1
|
||||
|
||||
if (
|
||||
labelEnd is None or labelEnd < 0 or charCodeAt(string, labelEnd + 1) != 0x3A
|
||||
): # /* : */
|
||||
return False
|
||||
|
||||
# [label]: destination 'title'
|
||||
# ^^^ skip optional whitespace here
|
||||
pos = labelEnd + 2
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if ch == 0x0A:
|
||||
lines += 1
|
||||
elif isSpace(ch):
|
||||
pass
|
||||
else:
|
||||
break
|
||||
pos += 1
|
||||
|
||||
# [label]: destination 'title'
|
||||
# ^^^^^^^^^^^ parse this
|
||||
res = state.md.helpers.parseLinkDestination(string, pos, maximum)
|
||||
if not res.ok:
|
||||
return False
|
||||
|
||||
href = state.md.normalizeLink(res.str)
|
||||
if not state.md.validateLink(href):
|
||||
return False
|
||||
|
||||
pos = res.pos
|
||||
lines += res.lines
|
||||
|
||||
# save cursor state, we could require to rollback later
|
||||
destEndPos = pos
|
||||
destEndLineNo = lines
|
||||
|
||||
# [label]: destination 'title'
|
||||
# ^^^ skipping those spaces
|
||||
start = pos
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if ch == 0x0A:
|
||||
lines += 1
|
||||
elif isSpace(ch):
|
||||
pass
|
||||
else:
|
||||
break
|
||||
pos += 1
|
||||
|
||||
# [label]: destination 'title'
|
||||
# ^^^^^^^ parse this
|
||||
res = state.md.helpers.parseLinkTitle(string, pos, maximum)
|
||||
if pos < maximum and start != pos and res.ok:
|
||||
title = res.str
|
||||
pos = res.pos
|
||||
lines += res.lines
|
||||
else:
|
||||
title = ""
|
||||
pos = destEndPos
|
||||
lines = destEndLineNo
|
||||
|
||||
# skip trailing spaces until the rest of the line
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if not isSpace(ch):
|
||||
break
|
||||
pos += 1
|
||||
|
||||
if pos < maximum and charCodeAt(string, pos) != 0x0A and title:
|
||||
# garbage at the end of the line after title,
|
||||
# but it could still be a valid reference if we roll back
|
||||
title = ""
|
||||
pos = destEndPos
|
||||
lines = destEndLineNo
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if not isSpace(ch):
|
||||
break
|
||||
pos += 1
|
||||
|
||||
if pos < maximum and charCodeAt(string, pos) != 0x0A:
|
||||
# garbage at the end of the line
|
||||
return False
|
||||
|
||||
label = normalizeReference(string[1:labelEnd])
|
||||
if not label:
|
||||
# CommonMark 0.20 disallows empty labels
|
||||
return False
|
||||
|
||||
# Reference can not terminate anything. This check is for safety only.
|
||||
if silent:
|
||||
return True
|
||||
|
||||
if "references" not in state.env:
|
||||
state.env["references"] = {}
|
||||
|
||||
state.line = startLine + lines + 1
|
||||
|
||||
# note, this is not part of markdown-it JS, but is useful for renderers
|
||||
if state.md.options.get("inline_definitions", False):
|
||||
token = state.push("definition", "", 0)
|
||||
token.meta = {
|
||||
"id": label,
|
||||
"title": title,
|
||||
"url": href,
|
||||
"label": string[1:labelEnd],
|
||||
}
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
if label not in state.env["references"]:
|
||||
state.env["references"][label] = {
|
||||
"title": title,
|
||||
"href": href,
|
||||
"map": [startLine, state.line],
|
||||
}
|
||||
else:
|
||||
state.env.setdefault("duplicate_refs", []).append(
|
||||
{
|
||||
"title": title,
|
||||
"href": href,
|
||||
"label": label,
|
||||
"map": [startLine, state.line],
|
||||
}
|
||||
)
|
||||
|
||||
state.parentType = oldParentType
|
||||
|
||||
return True
|
@@ -0,0 +1,261 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Literal
|
||||
|
||||
from ..common.utils import isStrSpace
|
||||
from ..ruler import StateBase
|
||||
from ..token import Token
|
||||
from ..utils import EnvType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from markdown_it.main import MarkdownIt
|
||||
|
||||
|
||||
class StateBlock(StateBase):
|
||||
def __init__(
|
||||
self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
|
||||
) -> None:
|
||||
self.src = src
|
||||
|
||||
# link to parser instance
|
||||
self.md = md
|
||||
|
||||
self.env = env
|
||||
|
||||
#
|
||||
# Internal state variables
|
||||
#
|
||||
|
||||
self.tokens = tokens
|
||||
|
||||
self.bMarks: list[int] = [] # line begin offsets for fast jumps
|
||||
self.eMarks: list[int] = [] # line end offsets for fast jumps
|
||||
# offsets of the first non-space characters (tabs not expanded)
|
||||
self.tShift: list[int] = []
|
||||
self.sCount: list[int] = [] # indents for each line (tabs expanded)
|
||||
|
||||
# An amount of virtual spaces (tabs expanded) between beginning
|
||||
# of each line (bMarks) and real beginning of that line.
|
||||
#
|
||||
# It exists only as a hack because blockquotes override bMarks
|
||||
# losing information in the process.
|
||||
#
|
||||
# It's used only when expanding tabs, you can think about it as
|
||||
# an initial tab length, e.g. bsCount=21 applied to string `\t123`
|
||||
# means first tab should be expanded to 4-21%4 === 3 spaces.
|
||||
#
|
||||
self.bsCount: list[int] = []
|
||||
|
||||
# block parser variables
|
||||
self.blkIndent = 0 # required block content indent (for example, if we are
|
||||
# inside a list, it would be positioned after list marker)
|
||||
self.line = 0 # line index in src
|
||||
self.lineMax = 0 # lines count
|
||||
self.tight = False # loose/tight mode for lists
|
||||
self.ddIndent = -1 # indent of the current dd block (-1 if there isn't any)
|
||||
self.listIndent = -1 # indent of the current list block (-1 if there isn't any)
|
||||
|
||||
# can be 'blockquote', 'list', 'root', 'paragraph' or 'reference'
|
||||
# used in lists to determine if they interrupt a paragraph
|
||||
self.parentType = "root"
|
||||
|
||||
self.level = 0
|
||||
|
||||
# renderer
|
||||
self.result = ""
|
||||
|
||||
# Create caches
|
||||
# Generate markers.
|
||||
indent_found = False
|
||||
|
||||
start = pos = indent = offset = 0
|
||||
length = len(self.src)
|
||||
|
||||
for pos, character in enumerate(self.src):
|
||||
if not indent_found:
|
||||
if isStrSpace(character):
|
||||
indent += 1
|
||||
|
||||
if character == "\t":
|
||||
offset += 4 - offset % 4
|
||||
else:
|
||||
offset += 1
|
||||
continue
|
||||
else:
|
||||
indent_found = True
|
||||
|
||||
if character == "\n" or pos == length - 1:
|
||||
if character != "\n":
|
||||
pos += 1
|
||||
self.bMarks.append(start)
|
||||
self.eMarks.append(pos)
|
||||
self.tShift.append(indent)
|
||||
self.sCount.append(offset)
|
||||
self.bsCount.append(0)
|
||||
|
||||
indent_found = False
|
||||
indent = 0
|
||||
offset = 0
|
||||
start = pos + 1
|
||||
|
||||
# Push fake entry to simplify cache bounds checks
|
||||
self.bMarks.append(length)
|
||||
self.eMarks.append(length)
|
||||
self.tShift.append(0)
|
||||
self.sCount.append(0)
|
||||
self.bsCount.append(0)
|
||||
|
||||
self.lineMax = len(self.bMarks) - 1 # don't count last fake line
|
||||
|
||||
# pre-check if code blocks are enabled, to speed up is_code_block method
|
||||
self._code_enabled = "code" in self.md["block"].ruler.get_active_rules()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"{self.__class__.__name__}"
|
||||
f"(line={self.line},level={self.level},tokens={len(self.tokens)})"
|
||||
)
|
||||
|
||||
def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token:
|
||||
"""Push new token to "stream"."""
|
||||
token = Token(ttype, tag, nesting)
|
||||
token.block = True
|
||||
if nesting < 0:
|
||||
self.level -= 1 # closing tag
|
||||
token.level = self.level
|
||||
if nesting > 0:
|
||||
self.level += 1 # opening tag
|
||||
self.tokens.append(token)
|
||||
return token
|
||||
|
||||
def isEmpty(self, line: int) -> bool:
|
||||
"""."""
|
||||
return (self.bMarks[line] + self.tShift[line]) >= self.eMarks[line]
|
||||
|
||||
def skipEmptyLines(self, from_pos: int) -> int:
|
||||
"""."""
|
||||
while from_pos < self.lineMax:
|
||||
try:
|
||||
if (self.bMarks[from_pos] + self.tShift[from_pos]) < self.eMarks[
|
||||
from_pos
|
||||
]:
|
||||
break
|
||||
except IndexError:
|
||||
pass
|
||||
from_pos += 1
|
||||
return from_pos
|
||||
|
||||
def skipSpaces(self, pos: int) -> int:
|
||||
"""Skip spaces from given position."""
|
||||
while True:
|
||||
try:
|
||||
current = self.src[pos]
|
||||
except IndexError:
|
||||
break
|
||||
if not isStrSpace(current):
|
||||
break
|
||||
pos += 1
|
||||
return pos
|
||||
|
||||
def skipSpacesBack(self, pos: int, minimum: int) -> int:
|
||||
"""Skip spaces from given position in reverse."""
|
||||
if pos <= minimum:
|
||||
return pos
|
||||
while pos > minimum:
|
||||
pos -= 1
|
||||
if not isStrSpace(self.src[pos]):
|
||||
return pos + 1
|
||||
return pos
|
||||
|
||||
def skipChars(self, pos: int, code: int) -> int:
|
||||
"""Skip character code from given position."""
|
||||
while True:
|
||||
try:
|
||||
current = self.srcCharCode[pos]
|
||||
except IndexError:
|
||||
break
|
||||
if current != code:
|
||||
break
|
||||
pos += 1
|
||||
return pos
|
||||
|
||||
def skipCharsStr(self, pos: int, ch: str) -> int:
|
||||
"""Skip character string from given position."""
|
||||
while True:
|
||||
try:
|
||||
current = self.src[pos]
|
||||
except IndexError:
|
||||
break
|
||||
if current != ch:
|
||||
break
|
||||
pos += 1
|
||||
return pos
|
||||
|
||||
def skipCharsBack(self, pos: int, code: int, minimum: int) -> int:
|
||||
"""Skip character code reverse from given position - 1."""
|
||||
if pos <= minimum:
|
||||
return pos
|
||||
while pos > minimum:
|
||||
pos -= 1
|
||||
if code != self.srcCharCode[pos]:
|
||||
return pos + 1
|
||||
return pos
|
||||
|
||||
def skipCharsStrBack(self, pos: int, ch: str, minimum: int) -> int:
|
||||
"""Skip character string reverse from given position - 1."""
|
||||
if pos <= minimum:
|
||||
return pos
|
||||
while pos > minimum:
|
||||
pos -= 1
|
||||
if ch != self.src[pos]:
|
||||
return pos + 1
|
||||
return pos
|
||||
|
||||
def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str:
|
||||
"""Cut lines range from source."""
|
||||
line = begin
|
||||
if begin >= end:
|
||||
return ""
|
||||
|
||||
queue = [""] * (end - begin)
|
||||
|
||||
i = 1
|
||||
while line < end:
|
||||
lineIndent = 0
|
||||
lineStart = first = self.bMarks[line]
|
||||
last = (
|
||||
self.eMarks[line] + 1
|
||||
if line + 1 < end or keepLastLF
|
||||
else self.eMarks[line]
|
||||
)
|
||||
|
||||
while (first < last) and (lineIndent < indent):
|
||||
ch = self.src[first]
|
||||
if isStrSpace(ch):
|
||||
if ch == "\t":
|
||||
lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4
|
||||
else:
|
||||
lineIndent += 1
|
||||
elif first - lineStart < self.tShift[line]:
|
||||
lineIndent += 1
|
||||
else:
|
||||
break
|
||||
first += 1
|
||||
|
||||
if lineIndent > indent:
|
||||
# partially expanding tabs in code blocks, e.g '\t\tfoobar'
|
||||
# with indent=2 becomes ' \tfoobar'
|
||||
queue[i - 1] = (" " * (lineIndent - indent)) + self.src[first:last]
|
||||
else:
|
||||
queue[i - 1] = self.src[first:last]
|
||||
|
||||
line += 1
|
||||
i += 1
|
||||
|
||||
return "".join(queue)
|
||||
|
||||
def is_code_block(self, line: int) -> bool:
|
||||
"""Check if line is a code block,
|
||||
i.e. the code block rule is enabled and text is indented by more than 3 spaces.
|
||||
"""
|
||||
return self._code_enabled and (self.sCount[line] - self.blkIndent) >= 4
|
@@ -0,0 +1,236 @@
|
||||
# GFM table, https://github.github.com/gfm/#tables-extension-
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from ..common.utils import charStrAt, isStrSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
headerLineRe = re.compile(r"^:?-+:?$")
|
||||
enclosingPipesRe = re.compile(r"^\||\|$")
|
||||
|
||||
|
||||
def getLine(state: StateBlock, line: int) -> str:
|
||||
pos = state.bMarks[line] + state.tShift[line]
|
||||
maximum = state.eMarks[line]
|
||||
|
||||
# return state.src.substr(pos, max - pos)
|
||||
return state.src[pos:maximum]
|
||||
|
||||
|
||||
def escapedSplit(string: str) -> list[str]:
|
||||
result: list[str] = []
|
||||
pos = 0
|
||||
max = len(string)
|
||||
isEscaped = False
|
||||
lastPos = 0
|
||||
current = ""
|
||||
ch = charStrAt(string, pos)
|
||||
|
||||
while pos < max:
|
||||
if ch == "|":
|
||||
if not isEscaped:
|
||||
# pipe separating cells, '|'
|
||||
result.append(current + string[lastPos:pos])
|
||||
current = ""
|
||||
lastPos = pos + 1
|
||||
else:
|
||||
# escaped pipe, '\|'
|
||||
current += string[lastPos : pos - 1]
|
||||
lastPos = pos
|
||||
|
||||
isEscaped = ch == "\\"
|
||||
pos += 1
|
||||
|
||||
ch = charStrAt(string, pos)
|
||||
|
||||
result.append(current + string[lastPos:])
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
tbodyLines = None
|
||||
|
||||
# should have at least two lines
|
||||
if startLine + 2 > endLine:
|
||||
return False
|
||||
|
||||
nextLine = startLine + 1
|
||||
|
||||
if state.sCount[nextLine] < state.blkIndent:
|
||||
return False
|
||||
|
||||
if state.is_code_block(nextLine):
|
||||
return False
|
||||
|
||||
# first character of the second line should be '|', '-', ':',
|
||||
# and no other characters are allowed but spaces;
|
||||
# basically, this is the equivalent of /^[-:|][-:|\s]*$/ regexp
|
||||
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
if pos >= state.eMarks[nextLine]:
|
||||
return False
|
||||
first_ch = state.src[pos]
|
||||
pos += 1
|
||||
if first_ch not in ("|", "-", ":"):
|
||||
return False
|
||||
|
||||
if pos >= state.eMarks[nextLine]:
|
||||
return False
|
||||
second_ch = state.src[pos]
|
||||
pos += 1
|
||||
if second_ch not in ("|", "-", ":") and not isStrSpace(second_ch):
|
||||
return False
|
||||
|
||||
# if first character is '-', then second character must not be a space
|
||||
# (due to parsing ambiguity with list)
|
||||
if first_ch == "-" and isStrSpace(second_ch):
|
||||
return False
|
||||
|
||||
while pos < state.eMarks[nextLine]:
|
||||
ch = state.src[pos]
|
||||
|
||||
if ch not in ("|", "-", ":") and not isStrSpace(ch):
|
||||
return False
|
||||
|
||||
pos += 1
|
||||
|
||||
lineText = getLine(state, startLine + 1)
|
||||
|
||||
columns = lineText.split("|")
|
||||
aligns = []
|
||||
for i in range(len(columns)):
|
||||
t = columns[i].strip()
|
||||
if not t:
|
||||
# allow empty columns before and after table, but not in between columns;
|
||||
# e.g. allow ` |---| `, disallow ` ---||--- `
|
||||
if i == 0 or i == len(columns) - 1:
|
||||
continue
|
||||
else:
|
||||
return False
|
||||
|
||||
if not headerLineRe.search(t):
|
||||
return False
|
||||
if charStrAt(t, len(t) - 1) == ":":
|
||||
aligns.append("center" if charStrAt(t, 0) == ":" else "right")
|
||||
elif charStrAt(t, 0) == ":":
|
||||
aligns.append("left")
|
||||
else:
|
||||
aligns.append("")
|
||||
|
||||
lineText = getLine(state, startLine).strip()
|
||||
if "|" not in lineText:
|
||||
return False
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
columns = escapedSplit(lineText)
|
||||
if columns and columns[0] == "":
|
||||
columns.pop(0)
|
||||
if columns and columns[-1] == "":
|
||||
columns.pop()
|
||||
|
||||
# header row will define an amount of columns in the entire table,
|
||||
# and align row should be exactly the same (the rest of the rows can differ)
|
||||
columnCount = len(columns)
|
||||
if columnCount == 0 or columnCount != len(aligns):
|
||||
return False
|
||||
|
||||
if silent:
|
||||
return True
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "table"
|
||||
|
||||
# use 'blockquote' lists for termination because it's
|
||||
# the most similar to tables
|
||||
terminatorRules = state.md.block.ruler.getRules("blockquote")
|
||||
|
||||
token = state.push("table_open", "table", 1)
|
||||
token.map = tableLines = [startLine, 0]
|
||||
|
||||
token = state.push("thead_open", "thead", 1)
|
||||
token.map = [startLine, startLine + 1]
|
||||
|
||||
token = state.push("tr_open", "tr", 1)
|
||||
token.map = [startLine, startLine + 1]
|
||||
|
||||
for i in range(len(columns)):
|
||||
token = state.push("th_open", "th", 1)
|
||||
if aligns[i]:
|
||||
token.attrs = {"style": "text-align:" + aligns[i]}
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
# note in markdown-it this map was removed in v12.0.0 however, we keep it,
|
||||
# since it is helpful to propagate to children tokens
|
||||
token.map = [startLine, startLine + 1]
|
||||
token.content = columns[i].strip()
|
||||
token.children = []
|
||||
|
||||
token = state.push("th_close", "th", -1)
|
||||
|
||||
token = state.push("tr_close", "tr", -1)
|
||||
token = state.push("thead_close", "thead", -1)
|
||||
|
||||
nextLine = startLine + 2
|
||||
while nextLine < endLine:
|
||||
if state.sCount[nextLine] < state.blkIndent:
|
||||
break
|
||||
|
||||
terminate = False
|
||||
for i in range(len(terminatorRules)):
|
||||
if terminatorRules[i](state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
break
|
||||
lineText = getLine(state, nextLine).strip()
|
||||
if not lineText:
|
||||
break
|
||||
if state.is_code_block(nextLine):
|
||||
break
|
||||
columns = escapedSplit(lineText)
|
||||
if columns and columns[0] == "":
|
||||
columns.pop(0)
|
||||
if columns and columns[-1] == "":
|
||||
columns.pop()
|
||||
|
||||
if nextLine == startLine + 2:
|
||||
token = state.push("tbody_open", "tbody", 1)
|
||||
token.map = tbodyLines = [startLine + 2, 0]
|
||||
|
||||
token = state.push("tr_open", "tr", 1)
|
||||
token.map = [nextLine, nextLine + 1]
|
||||
|
||||
for i in range(columnCount):
|
||||
token = state.push("td_open", "td", 1)
|
||||
if aligns[i]:
|
||||
token.attrs = {"style": "text-align:" + aligns[i]}
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
# note in markdown-it this map was removed in v12.0.0 however, we keep it,
|
||||
# since it is helpful to propagate to children tokens
|
||||
token.map = [nextLine, nextLine + 1]
|
||||
try:
|
||||
token.content = columns[i].strip() if columns[i] else ""
|
||||
except IndexError:
|
||||
token.content = ""
|
||||
token.children = []
|
||||
|
||||
token = state.push("td_close", "td", -1)
|
||||
|
||||
token = state.push("tr_close", "tr", -1)
|
||||
|
||||
nextLine += 1
|
||||
|
||||
if tbodyLines:
|
||||
token = state.push("tbody_close", "tbody", -1)
|
||||
tbodyLines[1] = nextLine
|
||||
|
||||
token = state.push("table_close", "table", -1)
|
||||
|
||||
tableLines[1] = nextLine
|
||||
state.parentType = oldParentType
|
||||
state.line = nextLine
|
||||
return True
|
Reference in New Issue
Block a user