2
0
Fork 0

Starts working, todo: work on a[i]k[center]b

This commit is contained in:
Thomas Touhey 2018-02-11 20:49:33 +01:00
parent bc0bcfa65f
commit 65d59c959f
No known key found for this signature in database
GPG Key ID: 2ECEB0517AD947FB
8 changed files with 218 additions and 125 deletions

View File

@ -30,6 +30,26 @@ The supported output types are:
- `lightscript`: Markdown-like language
([official topic on Planète Casio][lstp], [source repository][ls]).
## Tweaks
The `tohtml()` and `tolightscript()` can take additional keywords that tags
can read so that they can adapt their behaviour. The name of the tweaks are
case-insensitive and non-alphanumeric characters are ignored: for example,
`label_prefix`, `LABELPREFIX` and `__LaBeL___PRE_FIX__` are all equivalent.
The following tweaks are read by the translator and built-in tags:
- `label_prefix` (HTML): prefix to be used by the `[label]` and `[target]`
tags, e.g. `msg45529-`. Defaults to `""` for PCv42 compatibility;
- `obsolete_tags` (HTML): use obsolete HTML tags for old browsers (e.g. lynx)
compatibility, e.g. `<b>`, `<i>`, …. Defaults to `True`.
An example call would be:
#!/usr/bin/env python3
import textoutpc
print(textoutpc.tohtml("Hello, [i]beautiful[/i]!", obsolete__TAGS=False))
## What is left to do
- Manage paragraph and inline tags differently;

View File

@ -42,6 +42,11 @@ __test_cases = {
'<p class="align-justify"><i><b>c</b></i></p>' \
'<p><i>d</i>wouhou</p>',
# Show tag for super preprocessing blocks.
'[quote][show][justify]hehe': \
'<div class="citation"><p>&lt;p class="align-justify"&gt;' \
'hehe&lt;/p&gt;</p></div>',
# Titles.
'lolk[title]smth': '<p>lolk</p>' '<h4>smth</h4>',
'[subtitle]<>': '<h5>&lt;&gt;</h5>',

View File

@ -10,19 +10,20 @@ __all__ = ["version", "tohtml"]
version = "0.1"
def tohtml(message):
def tohtml(message, **tweaks):
""" Converts textout BBcode to HTML.
Receives a string, returns a string. """
return _Translator(_io.StringIO(message), _io.StringIO(), 'html') \
return _Translator(_io.StringIO(message), _io.StringIO(), 'html', tweaks) \
.process().getvalue()
def tolightscript(message):
def tolightscript(message, **tweaks):
""" Converts textout BBcode to Lightscript.
Receives a string, returns a string. """
return "" # TODO: real thing one day
return _Translator(_io.StringIO(message), _io.StringIO(), 'lightscript') \
.process().getvalue()
return _Translator(_io.StringIO(message), _io.StringIO(), 'lightscript', \
tweaks).process().getvalue()
# End of file.

View File

@ -56,12 +56,12 @@ _aliases = {alias: tag for alias, tag in \
# Function to get a tag.
# ---
def get_tag(name, value, output_type = 'html'):
def get_tag(name, value, output_type = 'html', tweaks = {}):
""" Find a tag using its name. """
try:
als = _aliases[name]
als = als(name, value, output_type)
als = als(name, value, output_type, tweaks)
return als
except:
return None

View File

@ -24,12 +24,14 @@ class TextoutTag:
aliases = ()
def __init__(self, name, value, ot):
def __init__(self, name, value, ot, tweaks):
""" Initialize the textout tag with the documented members. """
# Store internal data.
self.__output_type = ot
self.__tweaks = tweaks
self.output_type = ot
# Call both prepare functions.
@ -100,6 +102,12 @@ class TextoutTag:
elif hasattr(self, otype + '_' + ot):
setattr(self, otype, getattr(self, otype + '_' + ot))
def tweak(self, key, default = None):
try:
return self.__tweaks[key]
except KeyError:
return default
# ---
# Role-specific base tag classes.
# ---
@ -116,6 +124,8 @@ class TextoutInlineTag(TextoutTag):
class TextoutParagraphTag(TextoutBlockTag):
""" Main tag for basic paragraphs. """
notempty = True
def begin_html(self):
return '<p>'

View File

@ -5,7 +5,6 @@ import re as _re
__all__ = ["TextoutLabelTag", "TextoutTargetTag"]
_v42compat = True
_labelexpr = _re.compile('^[a-z0-9-]{1,16}$', _re.I)
class TextoutLabelTag(TextoutInlineTag):
@ -27,7 +26,7 @@ class TextoutLabelTag(TextoutInlineTag):
#name = 'label-{}'.format(self._label)
#if _v42compat:
# name += ' ' + self._label
name = self._label if _v42compat else 'label-{}'.format(self._label)
name = self.tweak("label_prefix", "") + self._label
return '<a name="{}"></a>'.format(name)
class TextoutTargetTag(TextoutInlineTag):
@ -46,7 +45,7 @@ class TextoutTargetTag(TextoutInlineTag):
def begin_html(self):
#name = 'label-' + self._label
name = self._label if _v42compat else 'label-' + self._label
name = self.tweak("label_prefix", "") + self._label
return '<a href="#{}">'.format(name)
def end_html(self):

View File

@ -81,7 +81,7 @@ class TextoutTextTag(TextoutInlineTag):
elif value == 'small':
self._size = _sml_size
else:
self._size = int(value) / 100.0
self._size = round(int(value) / 100.0, 2)
assert 0 < self._size <= 3.0
if self._size == 1.0:
@ -91,37 +91,70 @@ class TextoutTextTag(TextoutInlineTag):
else:
self._color = get_color(name)
def begin_html(self):
props = []
def _get_css(self):
""" Get the `style` CSS classes and properties for HTML output. """
classes, props = [], []
if not self.tweak('obsolete_tags', True):
if self._bold:
props.append('font-weight: bold')
if self._italic:
props.append('font-style: italic')
if self._underline or self._strike:
props.append('text-decoration:{}{}'.format(' underline' \
if self._underline else '', ' line-through' \
if self._strike else ''))
if self._font:
props.append('font-family: ' + self._font)
if self._color:
# always append the #rgb color: it will be read by older
# browsers if the `rgba()` function isn't supported.
props.append('color: #%02X%02X%02X' % self._color[0:3])
if self._color[3] < 1.0:
props.append('color: rgba({}, {}, {}, {})' \
.format(*self._color))
if self._color:
# `transparent` is at least considered as a special value,
# or at most as an alias to `rgba(0,0,0,0)`.
if self._color[3] == 0.0:
props.append('color: transparent')
else:
# always append the #rgb color: it will be read by older
# browsers if the `rgba()` function isn't supported.
props.append('color: #%02X%02X%02X' % self._color[0:3])
if self._color[3] < 1.0:
props.append('color: rgba({}, {}, {}, {})' \
.format(*self._color))
if self._size:
props.append('font-size: {}em'.format(self._size))
self._has_props = bool(props)
props = '<span style="{}">'.format('; '.join(props)) if props else ''
return classes, props
def begin_html(self):
obsoletetags = self.tweak('obsolete_tags', True)
cls, props = self._get_css()
if cls or props:
props = '<span{}{}>'.format(' class="{}"'.format(' '.join(cls)) \
if cls else '', ' style="{}"'.format('; '.join(props)) \
if props else '')
else:
props = ''
return '' \
+ ('', '<b>')[self._bold] \
+ ('', '<i>')[self._italic] \
+ ('', '<u>')[self._underline] \
+ ('', '<strike>')[self._strike] \
+ ('', '<b>')[obsoletetags and self._bold] \
+ ('', '<i>')[obsoletetags and self._italic] \
+ ('', '<u>')[obsoletetags and self._underline] \
+ ('', '<strike>')[obsoletetags and self._strike] \
+ props
def end_html(self):
obsoletetags = self.tweak('obsolete_tags', True)
return '' \
+ ('', '</span>')[self._has_props] \
+ ('', '</strike>')[self._strike] \
+ ('', '</u>')[self._underline] \
+ ('', '</i>')[self._italic] \
+ ('', '</b>')[self._bold]
+ ('', '</span>')[any(self._get_css())] \
+ ('', '</strike>')[obsoletetags and self._strike] \
+ ('', '</u>')[obsoletetags and self._underline] \
+ ('', '</i>')[obsoletetags and self._italic] \
+ ('', '</b>')[obsoletetags and self._bold]
# End of file.

View File

@ -3,7 +3,7 @@
See the `Translator` class documentation for more information.
"""
import regex as _re
import regex as _re, string as _string
from copy import deepcopy as _deepcopy
from html import escape as _htmlescape
from .tags import TextoutInlineTag, TextoutBlockTag, \
@ -14,6 +14,27 @@ from .urls import htmlurls as _htmlurls
__all__ = ["Translator"]
# ---
# Tweaks interface.
# ---
class _TweaksDictionary:
""" Tweaks dictionary. Read-only, and makes sure to match equivalent
tweak keyword, e.g. `label_prefix`, `LABELPREFIX` and
`__LaBeL___PRE_FIX__`. """
def __normalize(self, name):
return ''.join(c for c in name if c in _string.ascii_letters).lower()
def __init__(self, base):
self.__elts = {}
for kw in base:
self.__elts[self.__normalize(kw)] = base[kw]
def __getitem__(self, key):
return self.__elts[key]
# ---
# Tag data utility.
# ---
@ -71,7 +92,8 @@ class _TagData:
self.raw = bool(tag.raw) if hasattr(tag, 'raw') \
else hasattr(tag, 'preprocess')
self.super = bool(tag.superblock) if hasattr(tag, 'superblock') \
self.super = True if hasattr(tag, 'preprocess') else \
bool(tag.superblock) if hasattr(tag, 'superblock') \
else False
# Content processing utilities.
@ -95,13 +117,15 @@ class Translator:
You can even chain calls as the `process()` method returns
the output stream object. """
def __init__(self, inp, outp, output_type = 'html'):
def __init__(self, inp, outp, output_type = 'html', tweaks = {}):
""" Initializer. """
if output_type != 'html':
if not output_type in ('html', 'lightscript'):
raise Exception("Invalid output type")
self.output_type = output_type
self.tweaks = _TweaksDictionary(tweaks)
self.inp = inp
self.outp = outp
@ -135,7 +159,7 @@ class Translator:
self.raw_deg = 0
# ---
# Text and code outputting utilities.
# Text outputting utilities.
# ---
def process_text_group(self):
@ -155,6 +179,64 @@ class Translator:
return text
def put_text(self, text):
""" Output some text. """
# If we want to ignore the content (because it is not used
# nor output), let the text fall into the void.
if self.cign > 0:
return
# Add to the text group, which will be processed when `flush_text()`
# is used.
self.text_group += text
def flush_text(self):
""" Flush the text that has been output. """
# First of all, check if the text group is empty or if we want to
# ignore it.
if not self.text_group or self.cign > 0:
return
# The last queue is composed of booleans (does the group contain
# something or not) and texts for content processing.
# We want to set all of the booleans to True until the first text
# group, to which we want to add the current text.
# If there is no content preprocessing and we have to output it,
# we want to start the tags first: `dat == None` will be our signal!
#
# Think about resetting `text_group` as its content has been used
# somewhere (unbuffer data).
for dat in self.queue:
if isinstance(dat.last, bool):
dat.last = True
continue
dat.last += self.text_group
break
else:
dat = None
text = self.process_text_group()
self.text_group = ""
# Start the tags that haven't been started, and stuff.
self.start_tags()
# If the content has to be written, we ought to.
if dat == None:
self.outp.write(text)
# ---
# Code outputting utilities.
# ---
def put_code(self, code):
""" Put some code. """
@ -195,10 +277,25 @@ class Translator:
def put_newline(self):
""" Put a newline. """
# As in `flush_text()`, the last queue is composed of booleans.
# We want to set all of the booleans to True until the first text
# group, to which we want to add the current text.
# If there is no content preprocessing and we have to output it,
# we want to start the tags first: `dat == None` will be our signal!
for dat in self.queue:
if isinstance(dat.last, bool):
dat.last = True
continue
dat.last += code
break
else:
dat = None
# If we ought to put a newline, that means that the paragraph content
# is starting and that we might have to put the start of paragraph.
self.start_block()
self.start_tags()
# The newline depends on the output type and the context, of course.
@ -211,59 +308,6 @@ class Translator:
self.put_code(newline)
def put_text(self, text):
""" Output some text. """
# If we want to ignore the content (because it is not used
# nor output), let the text fall into the void.
if self.cign > 0:
return
# Add to the text group, which will be processed when `flush_text()`
# is used.
self.text_group += text
def flush_text(self):
""" Flush the text that has been output. """
# First of all, check if the text group is empty or if we want to
# ignore it.
if not self.text_group or self.cign > 0:
return
# The last queue is composed of booleans (does the group contain
# something or not) and texts for content processing.
# We want to set all of the booleans to True until the first text
# group, to which we want to add the current text.
# If there is no content preprocessing and we have to output it,
# we want to start the tags first: `dat == None` will be our signal!
for dat in self.queue:
if isinstance(dat.last, bool):
dat.last = True
continue
dat.last += self.text_group
break
else:
dat = None
# Start the tags that haven't been started, and stuff.
self.start_tags()
# If the content has to be written, we ought to.
if dat == None:
self.outp.write(self.process_text_group())
# Don't forget to reset the `text_group`, as its content has been
# used somewhere (unbuffer data).
self.text_group = ""
# ---
# Tag queue management.
# ---
@ -363,8 +407,8 @@ class Translator:
# Output the beginning and the content. If there was no content,
# just put the content that we got earlier.
dat.started = True
if hasattr(tag, 'begin'):
dat.started = True
self.put_code(tag.begin())
if hasattr(tag, 'content'):
@ -402,7 +446,14 @@ class Translator:
# Don't forget to end the tag!
self.end_last_tag()
if dat.notempty and not dat.started:
pass
else:
if hasattr(tag, 'end'):
self.put_code(tag.end())
if dat.type == dat.BLOCK:
self.end_block()
# Disable raw mode if it was a raw tag (which means that it enabled it,
# as tags into raw tags cannot be processed).
@ -446,7 +497,8 @@ class Translator:
for dat in blocks + inlines:
dat.started = True
self.put_code(dat.tag.begin())
if hasattr(dat.tag, 'begin'):
self.put_code(dat.tag.begin())
def end_block(self):
""" End the current block. """
@ -462,10 +514,8 @@ class Translator:
if not isinstance(dat.last, bool) or dat.super: break
# Then put the tag in the appropriate queue, and set it as
# unstarted for safety reasons.
# Then put the tag in the appropriate queue.
dat.started = False
if dat.type == dat.BLOCK:
blocks.append(dat)
else:
@ -483,32 +533,6 @@ class Translator:
dat.started = False
dat.last = False
def end_last_tag(self):
""" End the latest tag entered in the queue. """
if not self.queue:
return
# If the tag hasn't been started, then it shouldn't be ended.
dat = self.queue[0]
if not dat.started:
return
# If it is not a block, then we should just end it like that.
if dat.type != dat.BLOCK:
dat.started = False
tag = dat.tag
if hasattr(tag, 'end'):
self.put_code(tag.end())
return
# If we have arrived there, then the tag to end is a block.
# We want to end the whole current block.
self.end_block()
# ---
# Main function.
# ---
@ -520,7 +544,7 @@ class Translator:
# Other blocks will supplant this by being further in the queue.
self.push_tag(_TagData(TextoutParagraphTag(None, None,
self.output_type), None, ''))
self.output_type, self.tweaks), None, ''))
# We want to get our elements out of the element stream (Lephe
# told me that the `TextoutStream` class was actually a lexer,
@ -621,7 +645,8 @@ class Translator:
# Get the initialized tag with the name and value.
# If the tag is unknown, output the full thing and just go on.
tag = get_tag(tagdata.name, tagdata.value, self.output_type)
tag = get_tag(tagdata.name, tagdata.value, self.output_type,
self.tweaks)
if not tag:
self.put_text(tagdata.full)
continue