Spaces:

GGSheng
/

action

Running

App Files Files Community

action / .venv /lib /python3.14 /site-packages /markdown_it /parser_inline.py

GGSheng

feat: deploy Gemma 4 to hf space

020c337 verified about 22 hours ago

raw

history blame contribute delete

5.02 kB

	"""Tokenizes paragraph content."""

	from __future__ import annotations

	from collections.abc import Callable
	from typing import TYPE_CHECKING

	from . import rules_inline
	from .ruler import Ruler
	from .rules_inline.state_inline import StateInline
	from .token import Token
	from .utils import EnvType

	if TYPE_CHECKING:
	from markdown_it import MarkdownIt


	# Parser rules
	RuleFuncInlineType = Callable[[StateInline, bool], bool]
	"""(state: StateInline, silent: bool) -> matched: bool)

	`silent` disables token generation, useful for lookahead.
	"""
	_rules: list[tuple[str, RuleFuncInlineType]] = [
	("text", rules_inline.text),
	("linkify", rules_inline.linkify),
	("newline", rules_inline.newline),
	("escape", rules_inline.escape),
	("backticks", rules_inline.backtick),
	("strikethrough", rules_inline.strikethrough.tokenize),
	("emphasis", rules_inline.emphasis.tokenize),
	("link", rules_inline.link),
	("image", rules_inline.image),
	("autolink", rules_inline.autolink),
	("html_inline", rules_inline.html_inline),
	("entity", rules_inline.entity),
	]

	# Note `rule2` ruleset was created specifically for emphasis/strikethrough
	# post-processing and may be changed in the future.
	#
	# Don't use this for anything except pairs (plugins working with `balance_pairs`).
	#
	RuleFuncInline2Type = Callable[[StateInline], None]
	_rules2: list[tuple[str, RuleFuncInline2Type]] = [
	("balance_pairs", rules_inline.link_pairs),
	("strikethrough", rules_inline.strikethrough.postProcess),
	("emphasis", rules_inline.emphasis.postProcess),
	# rules for pairs separate '**' into its own text tokens, which may be left unused,
	# rule below merges unused segments back with the rest of the text
	("fragments_join", rules_inline.fragments_join),
	]


	class ParserInline:
	def __init__(self) -> None:
	self.ruler = Ruler[RuleFuncInlineType]()
	for name, rule in _rules:
	self.ruler.push(name, rule)
	# Second ruler used for post-processing (e.g. in emphasis-like rules)
	self.ruler2 = Ruler[RuleFuncInline2Type]()
	for name, rule2 in _rules2:
	self.ruler2.push(name, rule2)

	def skipToken(self, state: StateInline) -> None:
	"""Skip single token by running all rules in validation mode;
	returns `True` if any rule reported success
	"""
	ok = False
	pos = state.pos
	rules = self.ruler.getRules("")
	maxNesting = state.md.options["maxNesting"]
	cache = state.cache

	if pos in cache:
	state.pos = cache[pos]
	return

	if state.level < maxNesting:
	for rule in rules:
	# Increment state.level and decrement it later to limit recursion.
	# It's harmless to do here, because no tokens are created.
	# But ideally, we'd need a separate private state variable for this purpose.
	state.level += 1
	ok = rule(state, True)
	state.level -= 1
	if ok:
	break
	else:
	# Too much nesting, just skip until the end of the paragraph.
	#
	# NOTE: this will cause links to behave incorrectly in the following case,
	# when an amount of `[` is exactly equal to `maxNesting + 1`:
	#
	# [[[[[[[[[[[[[[[[[[[[[foo]()
	#
	# TODO: remove this workaround when CM standard will allow nested links
	# (we can replace it by preventing links from being parsed in
	# validation mode)
	#
	state.pos = state.posMax

	if not ok:
	state.pos += 1
	cache[pos] = state.pos

	def tokenize(self, state: StateInline) -> None:
	"""Generate tokens for input range."""
	ok = False
	rules = self.ruler.getRules("")
	end = state.posMax
	maxNesting = state.md.options["maxNesting"]

	while state.pos < end:
	# Try all possible rules.
	# On success, rule should:
	#
	# - update `state.pos`
	# - update `state.tokens`
	# - return true

	if state.level < maxNesting:
	for rule in rules:
	ok = rule(state, False)
	if ok:
	break

	if ok:
	if state.pos >= end:
	break
	continue

	state.pending += state.src[state.pos]
	state.pos += 1

	if state.pending:
	state.pushPending()

	def parse(
	self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
	) -> list[Token]:
	"""Process input string and push inline tokens into `tokens`"""
	state = StateInline(src, md, env, tokens)
	self.tokenize(state)
	rules2 = self.ruler2.getRules("")
	for rule in rules2:
	rule(state)
	return state.tokens