textout/textoutpc/parser.py

#!/usr/bin/env python
# *****************************************************************************
# Copyright (C) 2018-2023 Thomas Touhey <thomas@touhey.fr>
# This file is part of the textoutpc project, which is MIT-licensed.
# *****************************************************************************
"""Parser definition for textoutpc."""

from __future__ import annotations

from typing import NamedTuple, Sequence

from docutils.nodes import document as Document, Node, Text
from docutils.parsers import Parser

from .builtin import (
    AdminImageTag,
    AlignTag,
    CodeTag,
    ImageTag,
    InlineCodeTag,
    LabelTag,
    LinkTag,
    NoEvalTag,
    ProfileTag,
    ProgressTag,
    RotTag,
    SpoilerTag,
    TargetTag,
    TextTag,
)
from .exceptions import TagValidationError
from .lexer import (
    CloseTagEntity,
    Entity,
    NewlineEntity,
    OpenTagEntity,
    TextEntity,
    iter_textout_entities,
)
from .tags import Tag


BUILTIN_TAGS = {
    # TODO: Add the [calc] BBCode tag.
    # TODO: Add the [quote] BBCode tag.
    # TODO: Add the [indent] BBCode tag.
    # TODO: Add the [list] and [li] BBCode tags.
    # TODO: Add the [table], [tr], [td] and [th] BBCode tags.
    # TODO: Add the [video] and [video tiny] BBCode tags.
    "`": InlineCodeTag,
    "[adimg]": AdminImageTag,
    "[arial]": TextTag,
    "[b]": TextTag,
    "[big]": TextTag,
    "[blue]": TextTag,
    "[brown]": TextTag,
    "[c]": TextTag,
    "[center]": AlignTag,
    "[code]": CodeTag,
    "[color]": TextTag,
    "[comic]": TextTag,
    "[courier]": TextTag,
    "[css]": TextTag,
    "[font]": TextTag,
    "[gray]": TextTag,
    "[green]": TextTag,
    "[grey]": TextTag,
    "[haettenschweiler]": TextTag,
    "[i]": TextTag,
    "[img]": ImageTag,
    "[justify]": AlignTag,
    "[label]": LabelTag,
    "[maroon]": TextTag,
    "[mono]": TextTag,
    "[monospace]": TextTag,
    "[noeval]": NoEvalTag,
    "[o]": TextTag,
    "[profile]": ProfileTag,
    "[progress]": ProgressTag,
    "[purple]": TextTag,
    "[red]": TextTag,
    "[rot]": RotTag,
    "[rot13]": RotTag,
    "[s]": TextTag,  # Synonym for [strike].
    "[size]": TextTag,
    "[small]": TextTag,
    "[spoiler]": SpoilerTag,
    "[strike]": TextTag,
    "[tahoma]": TextTag,
    "[target]": TargetTag,
    "[u]": TextTag,
    "[url]": LinkTag,
    "[yellow]": TextTag,
}


class StackElement(NamedTuple):
    """Element of the parsing stack."""

    name: str
    """Name of the tag."""

    tag: Tag
    """Instantiated tag."""

    is_raw: bool
    """Whether the tag is raw or not."""

    children: list[Node]
    """Children nodes which to add to the parent element."""


class TextoutStateMachine:
    """State machine for a "textout"-style language."""

    __slots__ = ("document", "stack", "tags", "text")

    document: Document
    """Document to which to add elements."""

    tags: dict[str, type[Tag]]
    """Tags mapping."""

    stack: list[StackElement]
    """Element stack."""

    text: str
    """Text buffer.

    This is mostly used not to produce multiple Text elements.
    """

    def __init__(
        self,
        /,
        *,
        document: Document,
        tags: dict[str, type[Tag]],
    ) -> None:
        self.document = document
        self.tags = tags
        self.stack = []
        self.text = ""

    def flush_text(self, /) -> list[Node]:
        """Flush the text.

        :return: The obtained list.
        """
        text, self.text = self.text, ""
        if not text:
            return []

        return [Text(text)]

    def close_multiple(self, count: int, /) -> None:
        """Close multiple tags.

        :param count: Number of elements in the stack to close.
        """
        if len(self.stack) < count:  # pragma: no cover
            raise AssertionError(
                f"Could not close {count} contexts with a {len(self.stack)}-"
                + "deep stack.",
            )

        # We need to add the text element first if we have some text
        # in the buffer.
        children = self.flush_text()

        # We now need to close every one of the tags.
        for el in self.stack[:count]:
            children = list(
                el.tag.process(children=el.children + children),
            )

        self.stack[:count] = []

        prev: Sequence[Node] | Document
        if self.stack:
            prev = self.stack[0].children
        else:
            prev = self.document

        if (
            len(prev) > 0
            and len(children) > 0
            and isinstance(prev[-1], Text)
            and isinstance(children[0], Text)
        ):
            # We want to optimize the texts.
            children[0] = Text(str(prev.pop(-1)) + str(children[0]))

        prev.extend(children)

    def process(self, entity: Entity, /) -> None:
        """Process the lexical entity.

        :param entity: The entity to process.
        """
        if isinstance(entity, TextEntity):
            self.text += entity.content
            return

        if isinstance(entity, NewlineEntity):
            self.text += "\n"
            return

        if isinstance(entity, OpenTagEntity):
            if self.stack and self.stack[0].is_raw:
                # We are not allowed to open tags in a raw context.
                self.text += entity.raw
                return

            ent_name = f"[{entity.name}]"
            tag_cls = self.tags.get(ent_name)
            if tag_cls is None:
                self.text += entity.raw
                return

            try:
                tag = tag_cls(name=ent_name, value=entity.value)
            except TagValidationError:
                # TODO: Add a warning.
                self.text += entity.raw
                return

            # Add the text currently in the buffer to the top of the stack
            # before inserting the new element.
            text_nodes = self.flush_text()
            if text_nodes:
                prev: Sequence[Node] | Document
                if self.stack:
                    prev = self.stack[0].children
                else:
                    prev = self.document

                if len(prev) > 0 and isinstance(prev[-1], Text):
                    prev[-1] = Text(str(prev[-1]) + str(text_nodes[0]))
                else:
                    prev.extend(text_nodes)

            # Insert the element.
            self.stack.insert(
                0,
                StackElement(
                    name=f"[{entity.name}]",
                    tag=tag,
                    children=[],
                    is_raw=tag.is_raw(),
                ),
            )
            return

        if isinstance(entity, CloseTagEntity):
            ent_name = f"[{entity.name}]"
            if self.stack and self.stack[0].is_raw:
                if self.stack[0].name == ent_name:
                    # We are indeed closing the current raw tag!
                    self.close_multiple(1)
                else:
                    # We are not closing the raw tag, and cannot close any
                    # parent tag, so we actually just consider this as text.
                    self.text += entity.raw

                return

            for i, el in enumerate(self.stack):
                # In non-raw cases, the [/] tag means that we want to close
                # the first found tag.
                if ent_name in ("[]", el.name):
                    self.close_multiple(1 + i)
                    return
            else:
                # The closing tag doesn't correspond to an existing tag,
                # so we consider it as simple text.
                self.text += entity.raw
                return

        raise NotImplementedError(  # pragma: no cover
            f"Unsupported element {entity!r}",
        )

    def close(self, /) -> None:
        """Close the existing stack."""
        self.close_multiple(len(self.stack))


class TextoutParser(Parser):
    """Parser for Planète Casio "textout"-type BBCode.

    :param tags: The tags to use with the parser.
    """

    __slots__ = ("tags",)

    tags: dict[str, type[Tag]]
    """Tag classes, bound by name."""

    def __init__(self, /, *, tags: dict[str, type[Tag]] | None = None) -> None:
        if tags is None:
            tags = BUILTIN_TAGS

        self.tags = tags

    def parse(self, inputstring: str, document: Document) -> None:
        """Parse the input string in BBCode to a document.

        :param inputstring: The input string to parse to obtain the document.
        :param document: The document to populate.
        """
        self.setup_parse(inputstring, document)
        self.lexer = iter_textout_entities(self.inputstring)
        state_machine = TextoutStateMachine(document=document, tags=self.tags)

        for entity in self.lexer:
            state_machine.process(entity)

        state_machine.close()
        self.finish_parse()