#!/usr/bin/env python # ***************************************************************************** # Copyright (C) 2018-2023 Thomas Touhey # This file is part of the textoutpc project, which is MIT-licensed. # ***************************************************************************** """Parser definition for textoutpc.""" from __future__ import annotations from typing import NamedTuple, Sequence from docutils.nodes import document as Document, Node, Text from docutils.parsers import Parser as BaseParser from .builtin import ( AdminImageTag, AlignTag, CodeTag, ImageTag, InlineCodeTag, LabelTag, LinkTag, NoEvalTag, ProfileTag, ProgressTag, QuoteTag, RotTag, SpoilerTag, SubtitleTag, TargetTag, TextTag, TitleTag, ) from .exceptions import TagValidationError from .lexer import ( CloseTagEntity, Entity, NewlineEntity, OpenTagEntity, SpecialEntity, TextEntity, iter_textout_entities, ) from .tags import Tag BUILTIN_TAGS = { # TODO: Add the [calc] BBCode tag. # TODO: Add the [indent] BBCode tag. # TODO: Add the [list] and [li] BBCode tags. # TODO: Add the [table], [tr], [td] and [th] BBCode tags. # TODO: Add the [video] and [video tiny] BBCode tags. "`": InlineCodeTag, "[adimg]": AdminImageTag, "[arial]": TextTag, "[b]": TextTag, "[big]": TextTag, "[blue]": TextTag, "[brown]": TextTag, "[c]": TextTag, "[center]": AlignTag, "[code]": CodeTag, "[color]": TextTag, "[comic]": TextTag, "[courier]": TextTag, "[css]": TextTag, "[font]": TextTag, "[gray]": TextTag, "[green]": TextTag, "[grey]": TextTag, "[haettenschweiler]": TextTag, "[i]": TextTag, "[img]": ImageTag, "[justify]": AlignTag, "[label]": LabelTag, "[maroon]": TextTag, "[mono]": TextTag, "[monospace]": TextTag, "[noeval]": NoEvalTag, "[o]": TextTag, "[profile]": ProfileTag, "[progress]": ProgressTag, "[purple]": TextTag, "[quote]": QuoteTag, "[red]": TextTag, "[rot]": RotTag, "[rot13]": RotTag, "[s]": TextTag, # Synonym for [strike]. "[size]": TextTag, "[small]": TextTag, "[spoiler]": SpoilerTag, "[strike]": TextTag, "[subtitle]": SubtitleTag, "[tahoma]": TextTag, "[target]": TargetTag, "[title]": TitleTag, "[u]": TextTag, "[url]": LinkTag, "[yellow]": TextTag, } class StackElement(NamedTuple): """Element of the parsing stack.""" name: str """Name of the tag.""" tag: Tag """Instantiated tag.""" is_raw: bool """Whether the tag is raw or not.""" children: list[Node] """Children nodes which to add to the parent element.""" class StateMachine: """State machine for a "textout"-style language.""" __slots__ = ("document", "stack", "tags", "text") document: Document """Document to which to add elements.""" tags: dict[str, type[Tag]] """Tags mapping.""" stack: list[StackElement] """Element stack.""" text: str """Text buffer. This is mostly used not to produce multiple Text elements. """ def __init__( self, /, *, document: Document, tags: dict[str, type[Tag]], ) -> None: self.document = document self.tags = tags self.stack = [] self.text = "" def flush_text(self, /) -> list[Node]: """Flush the text. :return: The obtained list. """ text, self.text = self.text, "" if not text: return [] return [Text(text)] def open_tag(self, tag: Tag, /) -> None: """Open a new stack level. :param tag: The tag with which to open the tag. """ # Add the text currently in the buffer to the top of the stack # before inserting the new element. text_nodes = self.flush_text() if text_nodes: prev: Sequence[Node] | Document if self.stack: prev = self.stack[0].children else: prev = self.document if len(prev) > 0 and isinstance(prev[-1], Text): prev[-1] = Text(str(prev[-1]) + str(text_nodes[0])) else: prev.extend(text_nodes) # Insert the element. self.stack.insert( 0, StackElement( name=tag.name, tag=tag, children=[], is_raw=tag.is_raw(), ), ) def close_multiple(self, count: int, /) -> None: """Close multiple tags. :param count: Number of elements in the stack to close. """ if len(self.stack) < count: # pragma: no cover raise AssertionError( f"Could not close {count} contexts with a {len(self.stack)}-" + "deep stack.", ) # We need to add the text element first if we have some text # in the buffer. children = self.flush_text() # We now need to close every one of the tags. for el in self.stack[:count]: children = list( el.tag.process(children=el.children + children), ) self.stack[:count] = [] prev: Sequence[Node] | Document if self.stack: prev = self.stack[0].children else: prev = self.document if ( len(prev) > 0 and len(children) > 0 and isinstance(prev[-1], Text) and isinstance(children[0], Text) ): # We want to optimize the texts. children[0] = Text(str(prev.pop(-1)) + str(children[0])) prev.extend(children) def process(self, entity: Entity, /) -> None: """Process the lexical entity. :param entity: The entity to process. """ if isinstance(entity, TextEntity): self.text += entity.content return if isinstance(entity, NewlineEntity): self.text += "\n" return if isinstance(entity, OpenTagEntity): if self.stack and self.stack[0].is_raw: # We are not allowed to open tags in a raw context. self.text += entity.raw return ent_name = f"[{entity.name}]" tag_cls = self.tags.get(ent_name) if tag_cls is None: self.text += entity.raw return try: tag = tag_cls(name=ent_name, value=entity.value) except TagValidationError: # TODO: Add a warning. self.text += entity.raw return self.open_tag(tag) return if isinstance(entity, CloseTagEntity): ent_name = f"[{entity.name}]" if self.stack and self.stack[0].is_raw: if ent_name in ("[]", self.stack[0].name): # We are indeed closing the current raw tag! self.close_multiple(1) else: # We are not closing the raw tag, and cannot close any # parent tag, so we actually just consider this as text. self.text += entity.raw return for i, el in enumerate(self.stack): # In non-raw cases, the [/] tag means that we want to close # the first found tag. if ent_name in ("[]", el.name): self.close_multiple(1 + i) return # The closing tag doesn't correspond to an existing tag, # so we consider it as simple text. self.text += entity.raw return if isinstance(entity, SpecialEntity): # This either opens or closes a tag. if self.stack and self.stack[0].is_raw: if self.stack[0].name == entity.value: self.close_multiple(1) else: self.text += entity.value return # If the tag is opened, we want to close it. for i, el in enumerate(self.stack): if entity.value == el.name: self.close_multiple(1 + i) return tag_cls = self.tags.get(entity.value) if tag_cls is None: self.text += entity.value return # Otherwise, we want to open the tag. try: tag = tag_cls(name=entity.value) except TagValidationError: # TODO: Add a warning. self.text += entity.value return self.open_tag(tag) return raise NotImplementedError( # pragma: no cover f"Unsupported element {entity!r}", ) def close(self, /) -> None: """Close the existing stack.""" self.close_multiple(len(self.stack)) class Parser(BaseParser): """Parser for Planète Casio "textout"-type BBCode. :param tags: The tags to use with the parser. """ __slots__ = ("tags",) tags: dict[str, type[Tag]] """Tag classes, bound by name.""" def __init__(self, /, *, tags: dict[str, type[Tag]] | None = None) -> None: if tags is None: tags = BUILTIN_TAGS self.tags = tags def parse(self, inputstring: str, document: Document) -> None: """Parse the input string in BBCode to a document. :param inputstring: The input string to parse to obtain the document. :param document: The document to populate. """ self.setup_parse(inputstring, document) lexer = iter_textout_entities(self.inputstring) state_machine = StateMachine(document=document, tags=self.tags) for entity in lexer: state_machine.process(entity) state_machine.close() self.finish_parse()