Starts working, todo: work on a[i]k[center]b

2018-02-11 20:49:33 +01:00 · 2018-02-11 20:49:33 +01:00 · 65d59c959f
parent bc0bcfa65f
commit 65d59c959f
8 changed files with 218 additions and 125 deletions
--- a/README.md
+++ b/README.md
@ -30,6 +30,26 @@ The supported output types are:
 - `lightscript`: Markdown-like language
  ([official topic on Planète Casio][lstp], [source repository][ls]).

+## Tweaks
+The `tohtml()` and `tolightscript()` can take additional keywords that tags
+can read so that they can adapt their behaviour. The name of the tweaks are
+case-insensitive and non-alphanumeric characters are ignored: for example,
+`label_prefix`, `LABELPREFIX` and `__LaBeL___PRE_FIX__` are all equivalent.
+
+The following tweaks are read by the translator and built-in tags:
+
+- `label_prefix` (HTML): prefix to be used by the `[label]` and `[target]`
+  tags, e.g. `msg45529-`. Defaults to `""` for PCv42 compatibility;
+- `obsolete_tags` (HTML): use obsolete HTML tags for old browsers (e.g. lynx)
+  compatibility, e.g. `<b>`, `<i>`, …. Defaults to `True`.
+
+An example call would be:
+
+	#!/usr/bin/env python3
+	import textoutpc
+	
+	print(textoutpc.tohtml("Hello, [i]beautiful[/i]!", obsolete__TAGS=False))
+
 ## What is left to do

 - Manage paragraph and inline tags differently;
--- a/test/test_html.py
+++ b/test/test_html.py
@ -42,6 +42,11 @@ __test_cases = {
 		'<p class="align-justify"><i><b>c</b></i></p>' \
 		'<p><i>d</i>wouhou</p>',

+	# Show tag for super preprocessing blocks.
+	'[quote][show][justify]hehe': \
+		'<div class="citation"><p>&lt;p class="align-justify"&gt;' \
+		'hehe&lt;/p&gt;</p></div>',
+
 	# Titles.
 	'lolk[title]smth': '<p>lolk</p>' '<h4>smth</h4>',
 	'[subtitle]<>': '<h5>&lt;&gt;</h5>',
--- a/textoutpc/init.py
+++ b/textoutpc/init.py
@ -10,19 +10,20 @@ __all__ = ["version", "tohtml"]

 version = "0.1"

-def tohtml(message):
+def tohtml(message, **tweaks):
 	""" Converts textout BBcode to HTML.
 		Receives a string, returns a string. """

-	return _Translator(_io.StringIO(message), _io.StringIO(), 'html') \
+	return _Translator(_io.StringIO(message), _io.StringIO(), 'html', tweaks) \
 		.process().getvalue()

-def tolightscript(message):
+def tolightscript(message, **tweaks):
 	""" Converts textout BBcode to Lightscript.
 		Receives a string, returns a string. """

 	return "" # TODO: real thing one day
-	return _Translator(_io.StringIO(message), _io.StringIO(), 'lightscript') \
-		.process().getvalue()
+
+	return _Translator(_io.StringIO(message), _io.StringIO(), 'lightscript', \
+		tweaks).process().getvalue()

 # End of file.
--- a/textoutpc/tags/init.py
+++ b/textoutpc/tags/init.py
@ -56,12 +56,12 @@ _aliases = {alias: tag for alias, tag in \
 # Function to get a tag.
 # ---

-def get_tag(name, value, output_type = 'html'):
+def get_tag(name, value, output_type = 'html', tweaks = {}):
 	""" Find a tag using its name. """

 	try:
 		als = _aliases[name]
-		als = als(name, value, output_type)
+		als = als(name, value, output_type, tweaks)
 		return als
 	except:
 		return None
--- a/textoutpc/tags/base.py
+++ b/textoutpc/tags/base.py
@ -24,12 +24,14 @@ class TextoutTag:

 	aliases = ()

-	def __init__(self, name, value, ot):
+	def __init__(self, name, value, ot, tweaks):
 		""" Initialize the textout tag with the documented members. """

 		# Store internal data.

 		self.__output_type = ot
+		self.__tweaks = tweaks
+
 		self.output_type = ot

 		# Call both prepare functions.
@ -100,6 +102,12 @@ class TextoutTag:
 			elif hasattr(self, otype + '_' + ot):
 				setattr(self, otype, getattr(self, otype + '_' + ot))

+	def tweak(self, key, default = None):
+		try:
+			return self.__tweaks[key]
+		except KeyError:
+			return default
+
 # ---
 # Role-specific base tag classes.
 # ---
@ -116,6 +124,8 @@ class TextoutInlineTag(TextoutTag):
 class TextoutParagraphTag(TextoutBlockTag):
 	""" Main tag for basic paragraphs. """

+	notempty = True
+
 	def begin_html(self):
 		return '<p>'

--- a/textoutpc/tags/builtin/Label.py
+++ b/textoutpc/tags/builtin/Label.py
@ -5,7 +5,6 @@ import re as _re

 __all__ = ["TextoutLabelTag", "TextoutTargetTag"]

-_v42compat = True
 _labelexpr = _re.compile('^[a-z0-9-]{1,16}$', _re.I)

 class TextoutLabelTag(TextoutInlineTag):
@ -27,7 +26,7 @@ class TextoutLabelTag(TextoutInlineTag):
 		#name = 'label-{}'.format(self._label)
 		#if _v42compat:
 		#	name += ' ' + self._label
-		name = self._label if _v42compat else 'label-{}'.format(self._label)
+		name = self.tweak("label_prefix", "") + self._label
 		return '<a name="{}"></a>'.format(name)

 class TextoutTargetTag(TextoutInlineTag):
@ -46,7 +45,7 @@ class TextoutTargetTag(TextoutInlineTag):

 	def begin_html(self):
 		#name = 'label-' + self._label
-		name = self._label if _v42compat else 'label-' + self._label
+		name = self.tweak("label_prefix", "") + self._label
 		return '<a href="#{}">'.format(name)

 	def end_html(self):
--- a/textoutpc/tags/builtin/Text.py
+++ b/textoutpc/tags/builtin/Text.py
@ -81,7 +81,7 @@ class TextoutTextTag(TextoutInlineTag):
 			elif value == 'small':
 				self._size = _sml_size
 			else:
-				self._size = int(value) / 100.0
+				self._size = round(int(value) / 100.0, 2)
 				assert 0 < self._size <= 3.0

 			if self._size == 1.0:
@ -91,37 +91,70 @@ class TextoutTextTag(TextoutInlineTag):
 		else:
 			self._color = get_color(name)

-	def begin_html(self):
-		props = []
+	def _get_css(self):
+		""" Get the `style` CSS classes and properties for HTML output. """
+
+		classes, props = [], []
+
+		if not self.tweak('obsolete_tags', True):
+			if self._bold:
+				props.append('font-weight: bold')
+			if self._italic:
+				props.append('font-style: italic')
+			if self._underline or self._strike:
+				props.append('text-decoration:{}{}'.format(' underline' \
+					if self._underline else '', ' line-through' \
+					if self._strike else ''))
+
 		if self._font:
 			props.append('font-family: ' + self._font)
-		if self._color:
-			# always append the #rgb color: it will be read by older
-			# browsers if the `rgba()` function isn't supported.

-			props.append('color: #%02X%02X%02X' % self._color[0:3])
-			if self._color[3] < 1.0:
-				props.append('color: rgba({}, {}, {}, {})' \
-					.format(*self._color))
+		if self._color:
+			# `transparent` is at least considered as a special value,
+			# or at most as an alias to `rgba(0,0,0,0)`.
+
+			if self._color[3] == 0.0:
+				props.append('color: transparent')
+			else:
+				# always append the #rgb color: it will be read by older
+				# browsers if the `rgba()` function isn't supported.
+
+				props.append('color: #%02X%02X%02X' % self._color[0:3])
+				if self._color[3] < 1.0:
+					props.append('color: rgba({}, {}, {}, {})' \
+						.format(*self._color))
+
 		if self._size:
 			props.append('font-size: {}em'.format(self._size))

-		self._has_props = bool(props)
-		props = '<span style="{}">'.format('; '.join(props)) if props else ''
+		return classes, props
+
+	def begin_html(self):
+		obsoletetags = self.tweak('obsolete_tags', True)
+
+		cls, props = self._get_css()
+		if cls or props:
+			props = '<span{}{}>'.format(' class="{}"'.format(' '.join(cls)) \
+				if cls else '', ' style="{}"'.format('; '.join(props)) \
+				if props else '')
+		else:
+			props = ''

 		return '' \
-			+ ('', '<b>')[self._bold] \
-			+ ('', '<i>')[self._italic] \
-			+ ('', '<u>')[self._underline] \
-			+ ('', '<strike>')[self._strike] \
+			+ ('', '<b>')[obsoletetags and self._bold] \
+			+ ('', '<i>')[obsoletetags and self._italic] \
+			+ ('', '<u>')[obsoletetags and self._underline] \
+			+ ('', '<strike>')[obsoletetags and self._strike] \
 			+ props

 	def end_html(self):
+		obsoletetags = self.tweak('obsolete_tags', True)
+
 		return '' \
-			+ ('', '</span>')[self._has_props] \
-			+ ('', '</strike>')[self._strike] \
-			+ ('', '</u>')[self._underline] \
-			+ ('', '</i>')[self._italic] \
-			+ ('', '</b>')[self._bold]
+			+ ('', '</span>')[any(self._get_css())] \
+			+ ('', '</strike>')[obsoletetags and self._strike] \
+			+ ('', '</u>')[obsoletetags and self._underline] \
+			+ ('', '</i>')[obsoletetags and self._italic] \
+			+ ('', '</b>')[obsoletetags and self._bold]

 # End of file.
--- a/textoutpc/translate.py
+++ b/textoutpc/translate.py
@ -3,7 +3,7 @@
 	See the `Translator` class documentation for more information.
 """

-import regex as _re
+import regex as _re, string as _string
 from copy import deepcopy as _deepcopy
 from html import escape as _htmlescape
 from .tags import TextoutInlineTag, TextoutBlockTag, \
@ -14,6 +14,27 @@ from .urls import htmlurls as _htmlurls

 __all__ = ["Translator"]

+# ---
+# Tweaks interface.
+# ---
+
+class _TweaksDictionary:
+	""" Tweaks dictionary. Read-only, and makes sure to match equivalent
+		tweak keyword, e.g. `label_prefix`, `LABELPREFIX` and
+		`__LaBeL___PRE_FIX__`. """
+
+	def __normalize(self, name):
+		return ''.join(c for c in name if c in _string.ascii_letters).lower()
+
+	def __init__(self, base):
+		self.__elts = {}
+
+		for kw in base:
+			self.__elts[self.__normalize(kw)] = base[kw]
+
+	def __getitem__(self, key):
+		return self.__elts[key]
+
 # ---
 # Tag data utility.
 # ---
@ -71,7 +92,8 @@ class _TagData:
 		self.raw = bool(tag.raw) if hasattr(tag, 'raw') \
 			else hasattr(tag, 'preprocess')

-		self.super = bool(tag.superblock) if hasattr(tag, 'superblock') \
+		self.super = True if hasattr(tag, 'preprocess') else \
+			bool(tag.superblock) if hasattr(tag, 'superblock') \
 			else False

 		# Content processing utilities.
@ -95,13 +117,15 @@ class Translator:
 		You can even chain calls as the `process()` method returns
 		the output stream object. """

-	def __init__(self, inp, outp, output_type = 'html'):
+	def __init__(self, inp, outp, output_type = 'html', tweaks = {}):
 		""" Initializer. """

-		if output_type != 'html':
+		if not output_type in ('html', 'lightscript'):
 			raise Exception("Invalid output type")
 		self.output_type = output_type

+		self.tweaks = _TweaksDictionary(tweaks)
+
 		self.inp = inp
 		self.outp = outp

@ -135,7 +159,7 @@ class Translator:
 		self.raw_deg = 0

 	# ---
-	# Text and code outputting utilities.
+	# Text outputting utilities.
 	# ---

 	def process_text_group(self):
@ -155,6 +179,64 @@ class Translator:

 		return text

+	def put_text(self, text):
+		""" Output some text. """
+
+		# If we want to ignore the content (because it is not used
+		# nor output), let the text fall into the void.
+
+		if self.cign > 0:
+			return
+
+		# Add to the text group, which will be processed when `flush_text()`
+		# is used.
+
+		self.text_group += text
+
+	def flush_text(self):
+		""" Flush the text that has been output. """
+
+		# First of all, check if the text group is empty or if we want to
+		# ignore it.
+
+		if not self.text_group or self.cign > 0:
+			return
+
+		# The last queue is composed of booleans (does the group contain
+		# something or not) and texts for content processing.
+		# We want to set all of the booleans to True until the first text
+		# group, to which we want to add the current text.
+		# If there is no content preprocessing and we have to output it,
+		# we want to start the tags first: `dat == None` will be our signal!
+		#
+		# Think about resetting `text_group` as its content has been used
+		# somewhere (unbuffer data).
+
+		for dat in self.queue:
+			if isinstance(dat.last, bool):
+				dat.last = True
+				continue
+			dat.last += self.text_group
+			break
+		else:
+			dat = None
+			text = self.process_text_group()
+
+		self.text_group = ""
+
+		# Start the tags that haven't been started, and stuff.
+
+		self.start_tags()
+
+		# If the content has to be written, we ought to.
+
+		if dat == None:
+			self.outp.write(text)
+
+	# ---
+	# Code outputting utilities.
+	# ---
+
 	def put_code(self, code):
 		""" Put some code. """

@ -195,10 +277,25 @@ class Translator:
 	def put_newline(self):
 		""" Put a newline. """

+		# As in `flush_text()`, the last queue is composed of booleans.
+		# We want to set all of the booleans to True until the first text
+		# group, to which we want to add the current text.
+		# If there is no content preprocessing and we have to output it,
+		# we want to start the tags first: `dat == None` will be our signal!
+
+		for dat in self.queue:
+			if isinstance(dat.last, bool):
+				dat.last = True
+				continue
+			dat.last += code
+			break
+		else:
+			dat = None
+
 		# If we ought to put a newline, that means that the paragraph content
 		# is starting and that we might have to put the start of paragraph.

-		self.start_block()
+		self.start_tags()

 		# The newline depends on the output type and the context, of course.

@ -211,59 +308,6 @@ class Translator:

 		self.put_code(newline)

-	def put_text(self, text):
-		""" Output some text. """
-
-		# If we want to ignore the content (because it is not used
-		# nor output), let the text fall into the void.
-
-		if self.cign > 0:
-			return
-
-		# Add to the text group, which will be processed when `flush_text()`
-		# is used.
-
-		self.text_group += text
-
-	def flush_text(self):
-		""" Flush the text that has been output. """
-
-		# First of all, check if the text group is empty or if we want to
-		# ignore it.
-
-		if not self.text_group or self.cign > 0:
-			return
-
-		# The last queue is composed of booleans (does the group contain
-		# something or not) and texts for content processing.
-		# We want to set all of the booleans to True until the first text
-		# group, to which we want to add the current text.
-		# If there is no content preprocessing and we have to output it,
-		# we want to start the tags first: `dat == None` will be our signal!
-
-		for dat in self.queue:
-			if isinstance(dat.last, bool):
-				dat.last = True
-				continue
-			dat.last += self.text_group
-			break
-		else:
-			dat = None
-
-		# Start the tags that haven't been started, and stuff.
-
-		self.start_tags()
-
-		# If the content has to be written, we ought to.
-
-		if dat == None:
-			self.outp.write(self.process_text_group())
-
-		# Don't forget to reset the `text_group`, as its content has been
-		# used somewhere (unbuffer data).
-
-		self.text_group = ""
-
 	# ---
 	# Tag queue management.
 	# ---
@ -363,8 +407,8 @@ class Translator:
 			# Output the beginning and the content. If there was no content,
 			# just put the content that we got earlier.

+			dat.started = True
 			if hasattr(tag, 'begin'):
-				dat.started = True
 				self.put_code(tag.begin())

 			if hasattr(tag, 'content'):
@ -402,7 +446,14 @@ class Translator:

 		# Don't forget to end the tag!

-		self.end_last_tag()
+		if dat.notempty and not dat.started:
+			pass
+		else:
+			if hasattr(tag, 'end'):
+				self.put_code(tag.end())
+
+			if dat.type == dat.BLOCK:
+				self.end_block()

 		# Disable raw mode if it was a raw tag (which means that it enabled it,
 		# as tags into raw tags cannot be processed).
@ -446,7 +497,8 @@ class Translator:

 		for dat in blocks + inlines:
 			dat.started = True
-			self.put_code(dat.tag.begin())
+			if hasattr(dat.tag, 'begin'):
+				self.put_code(dat.tag.begin())

 	def end_block(self):
 		""" End the current block. """
@ -462,10 +514,8 @@ class Translator:

 			if not isinstance(dat.last, bool) or dat.super: break

-			# Then put the tag in the appropriate queue, and set it as
-			# unstarted for safety reasons.
+			# Then put the tag in the appropriate queue.

-			dat.started = False
 			if dat.type == dat.BLOCK:
 				blocks.append(dat)
 			else:
@ -483,32 +533,6 @@ class Translator:
 			dat.started = False
 			dat.last = False

-	def end_last_tag(self):
-		""" End the latest tag entered in the queue. """
-
-		if not self.queue:
-			return
-
-		# If the tag hasn't been started, then it shouldn't be ended.
-
-		dat = self.queue[0]
-		if not dat.started:
-			return
-
-		# If it is not a block, then we should just end it like that.
-
-		if dat.type != dat.BLOCK:
-			dat.started = False
-			tag = dat.tag
-			if hasattr(tag, 'end'):
-				self.put_code(tag.end())
-			return
-
-		# If we have arrived there, then the tag to end is a block.
-		# We want to end the whole current block.
-
-		self.end_block()
-
 	# ---
 	# Main function.
 	# ---
@ -520,7 +544,7 @@ class Translator:
 		# Other blocks will supplant this by being further in the queue.

 		self.push_tag(_TagData(TextoutParagraphTag(None, None,
-			self.output_type), None, ''))
+			self.output_type, self.tweaks), None, ''))

 		# We want to get our elements out of the element stream (Lephe
 		# told me that the `TextoutStream` class was actually a lexer,
@ -621,7 +645,8 @@ class Translator:
 			# Get the initialized tag with the name and value.
 			# If the tag is unknown, output the full thing and just go on.

-			tag = get_tag(tagdata.name, tagdata.value, self.output_type)
+			tag = get_tag(tagdata.name, tagdata.value, self.output_type,
+				self.tweaks)
 			if not tag:
 				self.put_text(tagdata.full)
 				continue