2
0
Fork 0
textout/textoutpc/translate.py

706 lines
19 KiB
Python
Executable File

#!/usr/bin/env python3
#******************************************************************************
# Copyright (C) 2018 Thomas "Cakeisalie5" Touhey <thomas@touhey.fr>
# This file is part of the textoutpc project, which is MIT-licensed.
#******************************************************************************
""" Main translation function.
See the `Translator` class documentation for more information.
"""
import string as _string
from copy import deepcopy as _deepcopy
from html import escape as _htmlescape
from importlib import import_module as _importmod
from .tags import TextoutBlockTag as _TextoutBlockTag, \
TextoutParagraphTag as _TextoutParagraphTag, TextoutTags as _Tags
from .stream import TextoutStream as _TextoutStream
from .smileys import htmlsmileys as _htmlsmileys
from .urls import htmlurls as _htmlurls
__all__ = ["Translator"]
# Builtin tags.
_builtin_tags = _Tags(_importmod('..builtin_tags', __name__))
# ---
# Tweaks interface.
# ---
class _TweaksDictionary:
""" Tweaks dictionary. Read-only, and makes sure to match equivalent
tweak keyword, e.g. `label_prefix`, `LABELPREFIX` and
`__LaBeL___PRE_FIX__`. """
def __normalize(self, name):
return ''.join(c for c in name if c in _string.ascii_letters).lower()
def __init__(self, base):
self.__elts = {}
for kw in base:
self.__elts[self.__normalize(kw)] = base[kw]
def __getitem__(self, key):
return self.__elts[key]
# ---
# Tag data utility.
# ---
class _TagData:
BLOCK = 1
INLINE = 2
def __init__(self, tag, name, full):
""" Tag data initialization.
Here, we prepare all of the attributes from the tag's
after-preparation attributes. """
# `name` is the name through which the tag has been called.
# `full` is the full tag beginning mark.
self.name = name
self.type = self.BLOCK if isinstance(tag, _TextoutBlockTag) \
else self.INLINE
self.full = full
# Tag beginning displaying.
# `notempty` is the moment when (and if) to start displaying the
# tag's code and content.
# `started` is whether the tag's beginning has been processed,
# i.e. if the content is no longer processed.
self.notempty = bool(tag.notempty) if hasattr(tag, 'notempty') \
else False
self.started = False
# `base` is the actual tag object returned by `get_tag()`.
self.base = tag
# Flags and properties calculated from the tag's attributes, using the
# rules given in `TAGS.md`.
# `ign` is whether the content should be read while the tag is opened.
# `raw` is whether the tag's content should be read as raw.
# `generic` is whether the tag can be terminated by the generic
# tag ending mark [/].
# `notempty` is whether the tag should be used with an empty
# content or not (e.g. to avoid `<p></p>`), True if not.
self.ign = not hasattr(tag, 'preprocess') and hasattr(tag, 'content')
self.generic = False if name == None else bool(tag.generic) \
if hasattr(tag, 'generic') else True
self.raw = bool(tag.raw) if hasattr(tag, 'raw') \
else hasattr(tag, 'preprocess')
self.super = True if hasattr(tag, 'preprocess') else \
bool(tag.superblock) if hasattr(tag, 'superblock') \
else False
# Content processing utilities.
# `last` is the content of the tag. A boolean indicates that we
# only want to know if the content is empty or not, and a string
# means we want to get the full content to re-use it later.
# In order not to manage a third case, even if the tag doesn't care
# if its content is empty or not, this property should be set to
# `False`.
self.last = "" if hasattr(tag, 'preprocess') else False
# Reset the tag.
self.reset()
def reset(self):
""" Reset the tag, generally because it has been closed. """
self.tag = _deepcopy(self.base)
self.started = False
if isinstance(self.last, bool):
self.last = False
else:
self.last = ""
def __repr__(self):
return '<TagData>'
# ---
# Translator main class.
# ---
class Translator:
""" One-time usage class for translating.
Use it this way: `Translator(my_inp, my_outp).process()`.
You can even chain calls as the `process()` method returns
the output stream object. """
def __init__(self, inp, outp, output_type = 'html', \
tweaks = {}, tags = _builtin_tags):
""" Initializer. """
if not output_type in ('html', 'lightscript'):
raise Exception("Invalid output type")
self.output_type = output_type
self.tweaks = _TweaksDictionary(tweaks)
self.tags = tags
self.inp = inp
self.outp = outp
# `queue` is the queue of tag containers, with the actual tag
# objects, calculated tag properties, variables for content processing,
# and other stuff.
# `cign` is the number of tags requiring the content to be ignored.
self.queue = []
self.cign = 0
# Text group management.
# In the following example text:
#
# some [incredible] text [align=center] you know
#
# There are two input groups, what's before and what's after the
# valid `[align=center]` tag. We want to flush the text in two steps
# only, in order to detect things such as URLs and smileys.
#
# The text group also manages the invalid tags, to manage URLs with
# brackets in it, e.g. https://example.org/[some-incredible-thing]-yea
self.text_group = ""
# `raw_mode` is whether the no evaluating mode is on or not.
# `raw_deg` is the number of times the raw tag has to be closed
# to exit.
self.raw_mode = False
self.raw_deg = 0
# ---
# Text outputting utilities.
# ---
def process_text_group(self):
""" Process text groups for naked URLs and stuff. """
# In all cases, we want to escape for HTML things, so that the
# user doesn't insert raw HTML tags (which would be a security flaw!).
if self.output_type == 'html':
text = _htmlescape(self.text_group)
# For non-raw HTML, we want to add smiley and URLs conversion,
# because it's nicer!
if not self.raw_mode and self.output_type == 'html':
text = _htmlsmileys(_htmlurls(text))
return text
def put_text(self, text):
""" Output some text. """
# If we want to ignore the content (because it is not used
# nor output), let the text fall into the void.
if self.cign > 0:
return
# Add to the text group, which will be processed when `flush_text()`
# is used.
self.text_group += text
def flush_text(self):
""" Flush the text that has been output. """
# First of all, check if the text group is empty or if we want to
# ignore it.
if not self.text_group or self.cign > 0:
return
# The last queue is composed of booleans (does the group contain
# something or not) and texts for content processing.
# We want to set all of the booleans to True until the first text
# group, to which we want to add the current text.
# If there is no content preprocessing and we have to output it,
# we want to start the tags first: `dat == None` will be our signal!
#
# Think about resetting `text_group` as its content has been used
# somewhere (unbuffer data).
for dat in self.queue:
if isinstance(dat.last, bool):
dat.last = True
continue
dat.last += self.text_group
break
else:
dat = None
text = self.process_text_group()
self.text_group = ""
# Start the tags that haven't been started, and stuff.
self.start_tags()
# If the content has to be written, we ought to.
if dat == None:
self.outp.write(text)
# ---
# Code outputting utilities.
# ---
def put_code(self, code):
""" Put some code. """
# We don't want to mix text and code, so we'll flush to be sure that
# the order doesn't get mixed up.
self.flush_text()
# First of all, check if the text is empty or if we want to ignore it.
if not code or self.cign > 0:
return
# As in `flush_text()`, the last queue is composed of booleans.
# We want to set all of the booleans to True until the first text
# group, to which we want to add the current text.
# If there is no content preprocessing and we have to output it,
# we want to start the tags first: `dat == None` will be our signal!
for dat in self.queue:
if isinstance(dat.last, bool):
dat.last = True
continue
dat.last += code
break
else:
dat = None
# Start the tags that haven't been started, and stuff.
self.start_tags()
# If the content has to be written, we ought to.
if dat == None:
self.outp.write(code)
def put_newline(self):
""" Put a newline. """
# The newline depends on the output type and the context, of course.
if self.output_type == 'html' and not self.raw_mode:
newline = '<br />\n'
else:
newline = '\n'
# Then put this as one puts code.
self.put_code(newline)
# ---
# Tag queue management.
# ---
def push_tag(self, dat):
""" Push a tag onto the tag stack. """
# If the tag does not process its content but replaces the content,
# that means the content is ignored.
if dat.ign:
self.cign += 1
# If we're about to put a tag or anything, empty the text block
# here.
self.flush_text()
# If it is a block, end the current block.
if dat.type == dat.BLOCK:
self.end_block()
# Insert the tag into the queue.
self.queue.insert(0, dat)
# Start the tag (and parent tags) if required.
self.start_tags()
# Don't forget to add the tag to the queue, and to enable raw
# mode if the tag expects a raw content (e.g. `[code]`).
if dat.raw:
self.raw_mode = True
self.raw_deg = 0
def pop_tag(self, end = ""):
""" Pop a tag from the tag stack.
`end` represents the full version of the ending tag marker,
for displaying if the tag is invalid. """
# Even if we had no beginning, no content and no end, what is
# here has to be distinguished from what was right before!
# So we need to flush the text group for this.
# (this will probably be useless for tags with preprocessing enabled,
# but that's okay, flushing doesn't modify the content processing
# queue)
self.flush_text()
# Pop the tag out of the queue.
dat = self.queue.pop(0)
tag = dat.tag
# If preprocessing has been enabled, we ought to process the content,
# check if the tag is valid, and do everything we would have done
# while pushing the tag if it didn't do content processing.
if hasattr(tag, 'preprocess'):
# Take out the content of the content preprocessing queue.
# If there is no content and the tag proposes a default content,
# let's use it instead.
content = dat.last
if not content and hasattr(tag, 'default'):
try:
content = tag.default()
except:
# The tag is not supposed to have an empty content,
# so we ought to put it as an invalid tag an go on.
self.put_text(dat.full)
self.put_text(end)
return
# Send the content to the tag while checking its validity (by
# checking if the `preprocess()` method returns an exception).
try:
ct = tag.preprocess(content)
except:
# The tag is invalid in the end, so we ought to send the
# raw things to the text group and forget about the tag.
self.put_text(dat.full)
self.put_text(content)
self.put_text(end)
return
# If we're here, congrats, the tag is valid! Now, if the
# `preprocess()` method returned something different, we
# want to use it instead.
if ct != None:
content = ct
# Output the beginning and the content. If there was no content,
# just put the content that we got earlier.
dat.started = True
if hasattr(tag, 'begin'):
self.put_code(tag.begin())
if hasattr(tag, 'content'):
self.put_code(tag.content())
elif dat.raw:
# XXX: I'm unsure about this. Shall raw tags return code
# or text? The text will only be escaped as raw mode is
# still enabled at this point.
self.put_text(content)
else:
self.put_code(content)
elif hasattr(tag, 'content'):
# Tag replaces content without preprocessing, which means
# the content has been ignored and the tag only puts the
# things.
self.cign -= 1
self.put_code(tag.content())
elif hasattr(tag, 'default'):
# Tag defines a default content if there might be none,
# without text preprocessing. If there is no content, print it.
# Notice that the default content method can also raise
# an exception if the tag in its current configuration should
# not have an empty content.
if not dat.last:
try:
self.put_text(tag.default())
except:
# The tag is not supposed to have empty content!
# Let's put the raw things again as when there is
# content processing.
self.put_text(dat.full)
self.put_text(end)
return
# Don't forget to end the tag!
if dat.notempty and not dat.started:
pass
elif dat.type != dat.BLOCK or dat.super:
if hasattr(tag, 'end'):
self.put_code(tag.end())
else:
self.queue.insert(0, dat)
self.end_block()
self.queue.pop(0)
# Disable raw mode if it was a raw tag (which means that it enabled it,
# as tags into raw tags cannot be processed).
if dat.raw:
self.raw_mode = False
# ---
# Automatically start and end tags.
# ---
def start_tags(self):
""" Start the tags that haven't been started yet.
This is usually called when content is output, for tags that
aren't empty. """
# First, get the references to the block and inline tags that need
# to be started.
blocks = []
inlines = []
for dat in self.queue:
# Check if the tag hasn't already been started or doesn't call
# for content processing.
if type(dat.last) != bool: break
if dat.notempty and not dat.last: break
if dat.started: continue
# Then put the tag in the appropriate queue, and set it as
# started for methods as `put_code()` that call this method
# back not to re-put anything.
if dat.type == dat.BLOCK:
blocks.insert(0, dat)
else:
inlines.insert(0, dat)
# Only select super blocks and the last non-super block.
selected = [block for block in blocks if block.super]
if blocks and not blocks[0].super:
selected.insert(0, blocks[0])
blocks = selected
# Then, put the tag beginnings.
for dat in blocks + inlines:
dat.started = True
if hasattr(dat.tag, 'begin'):
self.put_code(dat.tag.begin())
def end_block(self):
""" End the current block. """
# Get the original queue.
queue = self.queue.copy()
# We want to collect inline and block tags, in the order they
# were inserted, reversed.
blocks = []
inlines = []
for dat in self.queue:
# Check if the tag has been started and if it is a super
# block (which means we want to stop here).
if dat.super:
break
if not dat.started:
continue
# Then put the tag in the appropriate queue.
if dat.type == dat.BLOCK:
blocks.append(dat)
else:
inlines.append(dat)
# Then we want to end the tags, and reset them in case we're going
# to use them.
self.queue = inlines[::-1] + blocks[::-1]
while self.queue:
dat = self.queue.pop(0)
tag = dat.tag
if hasattr(tag, 'end'):
self.put_code(tag.end())
dat.reset()
# Restore the queue.
self.queue = queue
# ---
# Main function.
# ---
def process(self):
""" Main function of the textout translator. """
# By default, everything is in a paragraph.
# Other blocks will supplant this by being further in the queue.
self.push_tag(_TagData(_TextoutParagraphTag(None, None,
self.output_type, self.tweaks), None, ''))
# We want to get our elements out of the element stream (Lephe
# told me that the `TextoutStream` class was actually a lexer,
# but as I don't know the theory behind this...).
for element in _TextoutStream(self.inp):
# If it is a string or a newline, let's just put it.
# Otherwise, the element is some tag data or at least something
# that requires some special processing.
if isinstance(element, str):
self.put_text(element)
continue
tagdata = element
if tagdata.type == tagdata.NEWLINE:
self.put_newline()
continue
# XXX: As we don't manage paragraphs for now, end of lines and
# paragraphs separator are just output for now.
if not tagdata.type in (tagdata.BEGIN, tagdata.END, \
tagdata.SPECIAL):
self.put_text(tagdata.full)
continue
# Check if it is a tag end (we do not know for special tags,
# as they usually are one-character long).
if tagdata.type in (tagdata.END, tagdata.SPECIAL):
# If raw mode is activated, that means that the queue is not
# empty and that the top tag of the queue is the tag that
# initiated raw mode. We're just going to check that the name
# corresponds, and that the tag has not be opened into
# itself (see the description of `raw_deg` in the
# initializer).
if self.raw_mode:
if tagdata.name != self.queue[0].name \
and not (tagdata.name == "[]" and self.queue[0].generic):
self.put_text(tagdata.full)
continue
if self.raw_deg > 0:
self.put_text(tagdata.full)
self.raw_deg -= 1
continue
# Check to which opened tag the ending tag corresponds.
pos = -1
if tagdata.name == "[]":
# Generic closing tag [/] management.
# `pos` is set to 0 here.
for qpos, qdat in enumerate(self.queue):
if qdat.name != None:
pos = qpos
break
else:
# Get the position corresponding to the tag.
for qpos, qdat in enumerate(self.queue):
if tagdata.name == qdat.name:
pos = qpos
break
# Then react to `pos`.
# If `pos` is 0 or above, an opening tag has been found.
# We ought to autoclose opened stuff which are not
# terminated explicitely, and close the tag closed explicitely.
if pos >= 0:
while pos > 0:
self.pop_tag()
pos -= 1
self.pop_tag(tagdata.full)
continue
if tagdata.type == tagdata.END:
self.put_text(tagdata.full)
continue
# If we are here, the tag is a special tag which hasn't been
# identified to be an ending tag. We don't want to stop because
# that means it is a beginning tag.
# From here, we know the tag is not a beginning tag.
# In raw mode, always display the tag, but if the tag corresponds
# to the raw tag opened, augment the number of tags required to
# close the raw tag.
if self.raw_mode:
if tagdata.name == self.queue[0].name:
self.raw_deg += 1
self.put_text(tagdata.full)
continue
# Get the initialized tag with the name and value.
# If the tag is unknown, output the full thing and just go on.
tag = self.tags.get_tag(tagdata.name, tagdata.value,
self.output_type, self.tweaks)
if not tag:
self.put_text(tagdata.full)
continue
# And don't forget to push the tag.
dat = _TagData(tag, tagdata.name, tagdata.full)
self.push_tag(dat)
# Push a paragraph tag if the block is a superblock.
if dat.type == dat.BLOCK and dat.super:
self.push_tag(_TagData(_TextoutParagraphTag(None, None,
self.output_type, self.tweaks), None, ''))
# End of file, it seems! Let's close the tags, flush the text
# and just resume our lives from there.
while self.queue:
self.pop_tag()
self.flush_text()
# And don't forget to return the output for the user to chain
# stuff easily ;)
return self.outp
# End of file.