321 lines
8.7 KiB
Python
321 lines
8.7 KiB
Python
#!/usr/bin/env python
|
|
# *****************************************************************************
|
|
# Copyright (C) 2018-2023 Thomas Touhey <thomas@touhey.fr>
|
|
# This file is part of the textoutpc project, which is MIT-licensed.
|
|
# *****************************************************************************
|
|
"""Parser definition for textoutpc."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import NamedTuple, Sequence
|
|
|
|
from docutils.nodes import document as Document, Node, Text
|
|
from docutils.parsers import Parser
|
|
|
|
from .builtin import (
|
|
AdminImageTag,
|
|
AlignTag,
|
|
CodeTag,
|
|
ImageTag,
|
|
InlineCodeTag,
|
|
LabelTag,
|
|
LinkTag,
|
|
NoEvalTag,
|
|
ProfileTag,
|
|
ProgressTag,
|
|
RotTag,
|
|
SpoilerTag,
|
|
TargetTag,
|
|
TextTag,
|
|
)
|
|
from .exceptions import TagValidationError
|
|
from .lexer import (
|
|
CloseTagEntity,
|
|
Entity,
|
|
NewlineEntity,
|
|
OpenTagEntity,
|
|
TextEntity,
|
|
iter_textout_entities,
|
|
)
|
|
from .tags import Tag
|
|
|
|
|
|
BUILTIN_TAGS = {
|
|
# TODO: Add the [calc] BBCode tag.
|
|
# TODO: Add the [quote] BBCode tag.
|
|
# TODO: Add the [indent] BBCode tag.
|
|
# TODO: Add the [list] and [li] BBCode tags.
|
|
# TODO: Add the [table], [tr], [td] and [th] BBCode tags.
|
|
# TODO: Add the [video] and [video tiny] BBCode tags.
|
|
"`": InlineCodeTag,
|
|
"[adimg]": AdminImageTag,
|
|
"[arial]": TextTag,
|
|
"[b]": TextTag,
|
|
"[big]": TextTag,
|
|
"[blue]": TextTag,
|
|
"[brown]": TextTag,
|
|
"[c]": TextTag,
|
|
"[center]": AlignTag,
|
|
"[code]": CodeTag,
|
|
"[color]": TextTag,
|
|
"[comic]": TextTag,
|
|
"[courier]": TextTag,
|
|
"[css]": TextTag,
|
|
"[font]": TextTag,
|
|
"[gray]": TextTag,
|
|
"[green]": TextTag,
|
|
"[grey]": TextTag,
|
|
"[haettenschweiler]": TextTag,
|
|
"[i]": TextTag,
|
|
"[img]": ImageTag,
|
|
"[justify]": AlignTag,
|
|
"[label]": LabelTag,
|
|
"[maroon]": TextTag,
|
|
"[mono]": TextTag,
|
|
"[monospace]": TextTag,
|
|
"[noeval]": NoEvalTag,
|
|
"[o]": TextTag,
|
|
"[profile]": ProfileTag,
|
|
"[progress]": ProgressTag,
|
|
"[purple]": TextTag,
|
|
"[red]": TextTag,
|
|
"[rot]": RotTag,
|
|
"[rot13]": RotTag,
|
|
"[s]": TextTag, # Synonym for [strike].
|
|
"[size]": TextTag,
|
|
"[small]": TextTag,
|
|
"[spoiler]": SpoilerTag,
|
|
"[strike]": TextTag,
|
|
"[tahoma]": TextTag,
|
|
"[target]": TargetTag,
|
|
"[u]": TextTag,
|
|
"[url]": LinkTag,
|
|
"[yellow]": TextTag,
|
|
}
|
|
|
|
|
|
class StackElement(NamedTuple):
|
|
"""Element of the parsing stack."""
|
|
|
|
name: str
|
|
"""Name of the tag."""
|
|
|
|
tag: Tag
|
|
"""Instantiated tag."""
|
|
|
|
is_raw: bool
|
|
"""Whether the tag is raw or not."""
|
|
|
|
children: list[Node]
|
|
"""Children nodes which to add to the parent element."""
|
|
|
|
|
|
class TextoutStateMachine:
|
|
"""State machine for a "textout"-style language."""
|
|
|
|
__slots__ = ("document", "stack", "tags", "text")
|
|
|
|
document: Document
|
|
"""Document to which to add elements."""
|
|
|
|
tags: dict[str, type[Tag]]
|
|
"""Tags mapping."""
|
|
|
|
stack: list[StackElement]
|
|
"""Element stack."""
|
|
|
|
text: str
|
|
"""Text buffer.
|
|
|
|
This is mostly used not to produce multiple Text elements.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
/,
|
|
*,
|
|
document: Document,
|
|
tags: dict[str, type[Tag]],
|
|
) -> None:
|
|
self.document = document
|
|
self.tags = tags
|
|
self.stack = []
|
|
self.text = ""
|
|
|
|
def flush_text(self, /) -> list[Node]:
|
|
"""Flush the text.
|
|
|
|
:return: The obtained list.
|
|
"""
|
|
text, self.text = self.text, ""
|
|
if not text:
|
|
return []
|
|
|
|
return [Text(text)]
|
|
|
|
def close_multiple(self, count: int, /) -> None:
|
|
"""Close multiple tags.
|
|
|
|
:param count: Number of elements in the stack to close.
|
|
"""
|
|
if len(self.stack) < count: # pragma: no cover
|
|
raise AssertionError(
|
|
f"Could not close {count} contexts with a {len(self.stack)}-"
|
|
+ "deep stack.",
|
|
)
|
|
|
|
# We need to add the text element first if we have some text
|
|
# in the buffer.
|
|
children = self.flush_text()
|
|
|
|
# We now need to close every one of the tags.
|
|
for el in self.stack[:count]:
|
|
children = list(
|
|
el.tag.process(children=el.children + children),
|
|
)
|
|
|
|
self.stack[:count] = []
|
|
|
|
prev: Sequence[Node] | Document
|
|
if self.stack:
|
|
prev = self.stack[0].children
|
|
else:
|
|
prev = self.document
|
|
|
|
if (
|
|
len(prev) > 0
|
|
and len(children) > 0
|
|
and isinstance(prev[-1], Text)
|
|
and isinstance(children[0], Text)
|
|
):
|
|
# We want to optimize the texts.
|
|
children[0] = Text(str(prev.pop(-1)) + str(children[0]))
|
|
|
|
prev.extend(children)
|
|
|
|
def process(self, entity: Entity, /) -> None:
|
|
"""Process the lexical entity.
|
|
|
|
:param entity: The entity to process.
|
|
"""
|
|
if isinstance(entity, TextEntity):
|
|
self.text += entity.content
|
|
return
|
|
|
|
if isinstance(entity, NewlineEntity):
|
|
self.text += "\n"
|
|
return
|
|
|
|
if isinstance(entity, OpenTagEntity):
|
|
if self.stack and self.stack[0].is_raw:
|
|
# We are not allowed to open tags in a raw context.
|
|
self.text += entity.raw
|
|
return
|
|
|
|
ent_name = f"[{entity.name}]"
|
|
tag_cls = self.tags.get(ent_name)
|
|
if tag_cls is None:
|
|
self.text += entity.raw
|
|
return
|
|
|
|
try:
|
|
tag = tag_cls(name=ent_name, value=entity.value)
|
|
except TagValidationError:
|
|
# TODO: Add a warning.
|
|
self.text += entity.raw
|
|
return
|
|
|
|
# Add the text currently in the buffer to the top of the stack
|
|
# before inserting the new element.
|
|
text_nodes = self.flush_text()
|
|
if text_nodes:
|
|
prev: Sequence[Node] | Document
|
|
if self.stack:
|
|
prev = self.stack[0].children
|
|
else:
|
|
prev = self.document
|
|
|
|
if len(prev) > 0 and isinstance(prev[-1], Text):
|
|
prev[-1] = Text(str(prev[-1]) + str(text_nodes[0]))
|
|
else:
|
|
prev.extend(text_nodes)
|
|
|
|
# Insert the element.
|
|
self.stack.insert(
|
|
0,
|
|
StackElement(
|
|
name=f"[{entity.name}]",
|
|
tag=tag,
|
|
children=[],
|
|
is_raw=tag.is_raw(),
|
|
),
|
|
)
|
|
return
|
|
|
|
if isinstance(entity, CloseTagEntity):
|
|
ent_name = f"[{entity.name}]"
|
|
if self.stack and self.stack[0].is_raw:
|
|
if self.stack[0].name == ent_name:
|
|
# We are indeed closing the current raw tag!
|
|
self.close_multiple(1)
|
|
else:
|
|
# We are not closing the raw tag, and cannot close any
|
|
# parent tag, so we actually just consider this as text.
|
|
self.text += entity.raw
|
|
|
|
return
|
|
|
|
for i, el in enumerate(self.stack):
|
|
# In non-raw cases, the [/] tag means that we want to close
|
|
# the first found tag.
|
|
if ent_name in ("[]", el.name):
|
|
self.close_multiple(1 + i)
|
|
return
|
|
else:
|
|
# The closing tag doesn't correspond to an existing tag,
|
|
# so we consider it as simple text.
|
|
self.text += entity.raw
|
|
return
|
|
|
|
raise NotImplementedError( # pragma: no cover
|
|
f"Unsupported element {entity!r}",
|
|
)
|
|
|
|
def close(self, /) -> None:
|
|
"""Close the existing stack."""
|
|
self.close_multiple(len(self.stack))
|
|
|
|
|
|
class TextoutParser(Parser):
|
|
"""Parser for Planète Casio "textout"-type BBCode.
|
|
|
|
:param tags: The tags to use with the parser.
|
|
"""
|
|
|
|
__slots__ = ("tags",)
|
|
|
|
tags: dict[str, type[Tag]]
|
|
"""Tag classes, bound by name."""
|
|
|
|
def __init__(self, /, *, tags: dict[str, type[Tag]] | None = None) -> None:
|
|
if tags is None:
|
|
tags = BUILTIN_TAGS
|
|
|
|
self.tags = tags
|
|
|
|
def parse(self, inputstring: str, document: Document) -> None:
|
|
"""Parse the input string in BBCode to a document.
|
|
|
|
:param inputstring: The input string to parse to obtain the document.
|
|
:param document: The document to populate.
|
|
"""
|
|
self.setup_parse(inputstring, document)
|
|
self.lexer = iter_textout_entities(self.inputstring)
|
|
state_machine = TextoutStateMachine(document=document, tags=self.tags)
|
|
|
|
for entity in self.lexer:
|
|
state_machine.process(entity)
|
|
|
|
state_machine.close()
|
|
self.finish_parse()
|