2
0
Fork 0
textout/textoutpc/parser.py

367 lines
9.9 KiB
Python

#!/usr/bin/env python
# *****************************************************************************
# Copyright (C) 2018-2023 Thomas Touhey <thomas@touhey.fr>
# This file is part of the textoutpc project, which is MIT-licensed.
# *****************************************************************************
"""Parser definition for textoutpc."""
from __future__ import annotations
from typing import NamedTuple, Sequence
from docutils.nodes import document as Document, Node, Text
from docutils.parsers import Parser as BaseParser
from .builtin import (
AdminImageTag,
AlignTag,
CodeTag,
ImageTag,
InlineCodeTag,
LabelTag,
LinkTag,
NoEvalTag,
ProfileTag,
ProgressTag,
QuoteTag,
RotTag,
SpoilerTag,
SubtitleTag,
TargetTag,
TextTag,
TitleTag,
)
from .exceptions import TagValidationError
from .lexer import (
CloseTagEntity,
Entity,
NewlineEntity,
OpenTagEntity,
SpecialEntity,
TextEntity,
iter_textout_entities,
)
from .tags import Tag
BUILTIN_TAGS = {
# TODO: Add the [calc] BBCode tag.
# TODO: Add the [indent] BBCode tag.
# TODO: Add the [list] and [li] BBCode tags.
# TODO: Add the [table], [tr], [td] and [th] BBCode tags.
# TODO: Add the [video] and [video tiny] BBCode tags.
"`": InlineCodeTag,
"[adimg]": AdminImageTag,
"[arial]": TextTag,
"[b]": TextTag,
"[big]": TextTag,
"[blue]": TextTag,
"[brown]": TextTag,
"[c]": TextTag,
"[center]": AlignTag,
"[code]": CodeTag,
"[color]": TextTag,
"[comic]": TextTag,
"[courier]": TextTag,
"[css]": TextTag,
"[font]": TextTag,
"[gray]": TextTag,
"[green]": TextTag,
"[grey]": TextTag,
"[haettenschweiler]": TextTag,
"[i]": TextTag,
"[img]": ImageTag,
"[justify]": AlignTag,
"[label]": LabelTag,
"[maroon]": TextTag,
"[mono]": TextTag,
"[monospace]": TextTag,
"[noeval]": NoEvalTag,
"[o]": TextTag,
"[profile]": ProfileTag,
"[progress]": ProgressTag,
"[purple]": TextTag,
"[quote]": QuoteTag,
"[red]": TextTag,
"[rot]": RotTag,
"[rot13]": RotTag,
"[s]": TextTag, # Synonym for [strike].
"[size]": TextTag,
"[small]": TextTag,
"[spoiler]": SpoilerTag,
"[strike]": TextTag,
"[subtitle]": SubtitleTag,
"[tahoma]": TextTag,
"[target]": TargetTag,
"[title]": TitleTag,
"[u]": TextTag,
"[url]": LinkTag,
"[yellow]": TextTag,
}
class StackElement(NamedTuple):
"""Element of the parsing stack."""
name: str
"""Name of the tag."""
tag: Tag
"""Instantiated tag."""
is_raw: bool
"""Whether the tag is raw or not."""
children: list[Node]
"""Children nodes which to add to the parent element."""
class StateMachine:
"""State machine for a "textout"-style language."""
__slots__ = ("document", "stack", "tags", "text")
document: Document
"""Document to which to add elements."""
tags: dict[str, type[Tag]]
"""Tags mapping."""
stack: list[StackElement]
"""Element stack."""
text: str
"""Text buffer.
This is mostly used not to produce multiple Text elements.
"""
def __init__(
self,
/,
*,
document: Document,
tags: dict[str, type[Tag]],
) -> None:
self.document = document
self.tags = tags
self.stack = []
self.text = ""
def flush_text(self, /) -> list[Node]:
"""Flush the text.
:return: The obtained list.
"""
text, self.text = self.text, ""
if not text:
return []
return [Text(text)]
def open_tag(self, tag: Tag, /) -> None:
"""Open a new stack level.
:param tag: The tag with which to open the tag.
"""
# Add the text currently in the buffer to the top of the stack
# before inserting the new element.
text_nodes = self.flush_text()
if text_nodes:
prev: Sequence[Node] | Document
if self.stack:
prev = self.stack[0].children
else:
prev = self.document
if len(prev) > 0 and isinstance(prev[-1], Text):
prev[-1] = Text(str(prev[-1]) + str(text_nodes[0]))
else:
prev.extend(text_nodes)
# Insert the element.
self.stack.insert(
0,
StackElement(
name=tag.name,
tag=tag,
children=[],
is_raw=tag.is_raw(),
),
)
def close_multiple(self, count: int, /) -> None:
"""Close multiple tags.
:param count: Number of elements in the stack to close.
"""
if len(self.stack) < count: # pragma: no cover
raise AssertionError(
f"Could not close {count} contexts with a {len(self.stack)}-"
+ "deep stack.",
)
# We need to add the text element first if we have some text
# in the buffer.
children = self.flush_text()
# We now need to close every one of the tags.
for el in self.stack[:count]:
children = list(
el.tag.process(children=el.children + children),
)
self.stack[:count] = []
prev: Sequence[Node] | Document
if self.stack:
prev = self.stack[0].children
else:
prev = self.document
if (
len(prev) > 0
and len(children) > 0
and isinstance(prev[-1], Text)
and isinstance(children[0], Text)
):
# We want to optimize the texts.
children[0] = Text(str(prev.pop(-1)) + str(children[0]))
prev.extend(children)
def process(self, entity: Entity, /) -> None:
"""Process the lexical entity.
:param entity: The entity to process.
"""
if isinstance(entity, TextEntity):
self.text += entity.content
return
if isinstance(entity, NewlineEntity):
self.text += "\n"
return
if isinstance(entity, OpenTagEntity):
if self.stack and self.stack[0].is_raw:
# We are not allowed to open tags in a raw context.
self.text += entity.raw
return
ent_name = f"[{entity.name}]"
tag_cls = self.tags.get(ent_name)
if tag_cls is None:
self.text += entity.raw
return
try:
tag = tag_cls(name=ent_name, value=entity.value)
except TagValidationError:
# TODO: Add a warning.
self.text += entity.raw
return
self.open_tag(tag)
return
if isinstance(entity, CloseTagEntity):
ent_name = f"[{entity.name}]"
if self.stack and self.stack[0].is_raw:
if ent_name in ("[]", self.stack[0].name):
# We are indeed closing the current raw tag!
self.close_multiple(1)
else:
# We are not closing the raw tag, and cannot close any
# parent tag, so we actually just consider this as text.
self.text += entity.raw
return
for i, el in enumerate(self.stack):
# In non-raw cases, the [/] tag means that we want to close
# the first found tag.
if ent_name in ("[]", el.name):
self.close_multiple(1 + i)
return
# The closing tag doesn't correspond to an existing tag,
# so we consider it as simple text.
self.text += entity.raw
return
if isinstance(entity, SpecialEntity):
# This either opens or closes a tag.
if self.stack and self.stack[0].is_raw:
if self.stack[0].name == entity.value:
self.close_multiple(1)
else:
self.text += entity.value
return
# If the tag is opened, we want to close it.
for i, el in enumerate(self.stack):
if entity.value == el.name:
self.close_multiple(1 + i)
return
tag_cls = self.tags.get(entity.value)
if tag_cls is None:
self.text += entity.value
return
# Otherwise, we want to open the tag.
try:
tag = tag_cls(name=entity.value)
except TagValidationError:
# TODO: Add a warning.
self.text += entity.value
return
self.open_tag(tag)
return
raise NotImplementedError( # pragma: no cover
f"Unsupported element {entity!r}",
)
def close(self, /) -> None:
"""Close the existing stack."""
self.close_multiple(len(self.stack))
class Parser(BaseParser):
"""Parser for Planète Casio "textout"-type BBCode.
:param tags: The tags to use with the parser.
"""
__slots__ = ("tags",)
tags: dict[str, type[Tag]]
"""Tag classes, bound by name."""
def __init__(self, /, *, tags: dict[str, type[Tag]] | None = None) -> None:
if tags is None:
tags = BUILTIN_TAGS
self.tags = tags
def parse(self, inputstring: str, document: Document) -> None:
"""Parse the input string in BBCode to a document.
:param inputstring: The input string to parse to obtain the document.
:param document: The document to populate.
"""
self.setup_parse(inputstring, document)
lexer = iter_textout_entities(self.inputstring)
state_machine = StateMachine(document=document, tags=self.tags)
for entity in lexer:
state_machine.process(entity)
state_machine.close()
self.finish_parse()