All unit tests passed!

2018-01-19 22:44:43 +01:00 · 2018-01-19 22:44:43 +01:00 · d4d1b34e03
parent b4c9dc19be
commit d4d1b34e03
10 changed files with 158 additions and 97 deletions
--- a/README.md
+++ b/README.md
@ -71,5 +71,17 @@ _Planète Casio_ admin, who also made the native Python module.
 	
 	[strike]Usage of the quick tag ending[/]

+## What is left to do
+
+- Finish correcting
+- Manage paragraph and inline tags differently;
+- Manage lightscript (or even markdown?) as output languages;
+- Check where the errors are to display them to the user:
+  
+  - Count character offset, line number and column number in the lexer;
+  - Produce readable exceptions;
+  - Make a clean interface to transmit them;
+- Look for security flaws (we don't want stored XSS here!).
+
 [pc]: https://www.planet-casio.com/Fr/
 [ls]: https://bitbucket.org/Lephenixnoir/lightscript
--- a/setup.py
+++ b/setup.py
@ -15,6 +15,8 @@ setup(name='textoutpc',
 	packages=['textoutpc'],
 	scripts=['textoutpc'],

+	install_requires=['regex'],
+
 	classifiers = [
 		'Development Status :: 2 - Pre-Alpha',
 		'Topic :: Software Development :: Interpreters'
--- a/test/test_html.py
+++ b/test/test_html.py
@ -22,8 +22,10 @@ __test_cases = {
 	'[rot13]obawbhe[/rot13]': 'bonjour',

 	# Links.
-	'[url=http://hey.org/lol[]>"a]': '<a href="http://hey.org/lol[]>%22a" ' \
-		'target="_blank" rel="noopener">http://hey.org/lol[]&gt;&quot;a</a>',
+	'[url=http://hey.org/lol[]>"a]': '<a href="http://hey.org/lol[]&gt;' \
+		'&quot;a" target="_blank" rel="noopener">' \
+		'<a href="http://hey.org/lol[]&gt;&quot;a" target="_blank"' \
+		' rel="noopener">http://hey.org/lol[]&gt;&quot;a</a></a>',
 	'[url]javascript:alert(1)[/url]': '[url]javascript:alert(1)[/url]',
 	'(http://www.example.org/some-[damn-url]-(youknow))': \
 		'(<a href="http://www.example.org/some-[damn-url]-(youknow)" ' \
@ -77,9 +79,10 @@ __test_cases = {
 		'frameborder="0" allowfullscreen></iframe>',
 	'[video]https://www.youtube.com/watch?v=<script>alert(1)</script>' \
 	'[/video]': \
-		'<a href="https://www.youtube.com/watch?v=<script>alert(1)</script>"' \
-		' target="_blank" rel="noopener">https://www.youtube.com/watch?v=' \
-		'&lt;script&gt;alert(1)&lt;/script&gt;</a>',
+		'<a href="https://www.youtube.com/watch?v=&lt;script&gt;alert(1)' \
+		'&lt;/script&gt;" target="_blank" rel="noopener">' \
+		'https://www.youtube.com/watch?v=&lt;script&gt;alert(1)' \
+		'&lt;/script&gt;</a>',

 	# Quotes.
 	'[quote=Test 1 :)]lel[/quote]': \
--- a/textoutpc/Tags/Link.py
+++ b/textoutpc/Tags/Link.py
@ -52,7 +52,7 @@ class TextoutLinkTag(TextoutRawInlineTag):

 	def begin_html(self):
 		return '<a href="{}" target="_blank" rel="noopener">' \
-			.format(self.url.replace('"', '%22'))
+			.format(_htmlescape(self.url))

 	def end_html(self):
 		return '</a>'
--- a/textoutpc/Tags/Video.py
+++ b/textoutpc/Tags/Video.py
@ -85,7 +85,8 @@ class TextoutVideoTag(TextoutRawBlockTag):
 				'frameborder="0" webkitAllowFullScreen allowFullScreen>' \
 				'</iframe>'.format(self.code, self.w, self.h)
 		else:
+			url = _htmlescape(self.url)
 			return '<a href="{}" target="_blank" rel="noopener">{}</a>' \
-				.format(self.url.replace('"', '%22'), _htmlescape(self.url))
+				.format(url, url)

 # End of file.
--- a/textoutpc/init.py
+++ b/textoutpc/init.py
@ -14,7 +14,7 @@ def tohtml(message):
 	""" Converts textout BBcode to HTML.
 		Receives a string, returns a string. """

-	return _Translator(io.StringIO(message), io.StringIO()) \
+	return _Translator(io.StringIO(message), io.StringIO(), 'html') \
 		.process().getvalue()

 # End of file.
--- a/textoutpc/smileys.py
+++ b/textoutpc/smileys.py
@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+""" Smiley conversion.
+	Just convert them™.
+"""
+
+import regex as _re
+
+__all__ = ["htmlsmileys"]
+
+# ---
+# List of them.
+# ---
+
+_Smileys_prefix = "/images/smileys/"
+
+_Smileys = {
+	'>:)':			'twisted.gif',
+	'>:(':			'/images/smileys/evil.gif',
+	':)':			'/images/smileys/smile.gif',
+	';)':			'/images/smileys/wink.gif',
+	':(':			'/images/smileys/sad.gif',
+	':D':			'/images/smileys/grin.gif',
+	':p': 			'/images/smileys/hehe.gif',
+	'8-)':			'/images/smileys/cool2.gif',
+	':@':			'/images/smileys/mad.gif',
+	'0_0':			'/images/smileys/eek.gif',
+	':E':			'/images/smileys/mrgreen.gif',
+	':O':			'/images/smileys/shocked.gif',
+	':s':			'/images/smileys/confused2.gif',
+	'^^':			'/images/smileys/eyebrows.gif',
+	":'(":			'/images/smileys/cry.gif',
+#	':-°':			('/images/smileys/whistle.gif', 'height: 15px;'),
+
+	# Name-based smileys.
+
+	':lol:':		'/images/smileys/lol.gif',
+	':oops:':		'/images/smileys/confused2.gif',
+	':grr:':		'/images/smileys/evil.gif',
+	':sry:':		'/images/smileys/redface.gif',
+	':mmm:':		'/images/smileys/rolleyes.gif',
+	':waza:':		'/images/smileys/waza.gif',
+#	':whistle:':	('/images/smileys/whistle.gif', 'height: 15px;'),
+	':here:':		'/images/smileys/pointer.gif',
+	':bow:':		'/images/smileys/bow.gif',
+	':cool:':		'/images/smileys/cool.gif',
+	':good:':		'/images/smileys/welldone.gif',
+	':love:':		'/images/smileys/love.gif',
+	':aie:':		'/images/smileys/banghead2.gif',
+	':cry:':		'/images/smileys/cry.gif',
+	':facepalm:':	'/images/smileys/facepalm.gif',
+	':argh:':		'/images/smileys/insults.gif',
+	':?:':			'/images/smileys/what.gif',
+	':!:':			'/images/smileys/excl.gif',
+	':arrow:':		'/images/smileys/here.gif',
+	':grin:':		'/images/smileys/grin.gif',
+}
+
+def _Smiley_sub(m):
+	return m.group(1) + '<img src="{}{}">'.format(_Smileys_prefix,
+		_Smileys[m.group(2)]) + m.group(3)
+
+_Smiley_re = _re.compile('(^|\\s)(' + '|'.join(map(_re.escape,
+	_Smileys.keys())) + ')(\\s|$)')
+
+# ---
+# Functions.
+# ---
+
+def htmlsmileys(text):
+	""" HTML smileys """
+
+	text = _Smiley_re.sub(_Smiley_sub, text)
+	text = _Smiley_re.sub(_Smiley_sub, text)
+	return text
+
+# End of file.
--- a/textoutpc/stream.py
+++ b/textoutpc/stream.py
--- a/textoutpc/translate.py
+++ b/textoutpc/translate.py
@ -7,87 +7,13 @@
 import regex as _re
 from io import StringIO as _sio
 from html import escape as _htmlescape
-from .Stream import *
+from .stream import *
 from .Tags import TextoutRawTag, TextoutInlineTag, TextoutBlockTag, get_tag
+from .smileys import htmlsmileys as _htmlsmileys
+from .urls import htmlurls as _htmlurls

 __all__ = ["Translator"]

-# ---
-# Naked URLs regex.
-# ---
-
-def _NakedURLs_sub(m):
-	sp = m.groups('sp')
-	url = m.groups('url')
-	aft = ''
-
-	# Hack for the last comma.
-	if url[-1] == ',':
-		url, aft = url[:-1], ','
-
-	return '{}<a href="{}" target="_blank" rel="noopener">{}</a>{}' \
-		.format(sp, url, url, aft)
-
-_NakedURLs_re = _re.compile(r"""\
-	(?P<sp>^|\s|[[:punct:]])
-	(?P<url>(https?|ftp):
-		(?P<ucore>[^\[\]\(\)\s]* (\[(?&ucore)\]?)* (\((?&ucore)\)?)*)*
-	)
-""", _re.VERBOSE | _re.M)
-
-# ---
-# Smileys.
-# ---
-
-_Smileys = {
-	'>:)':			'/images/smileys/twisted.gif',
-	'>:(':			'/images/smileys/evil.gif',
-	':)':			'/images/smileys/smile.gif',
-	';)':			'/images/smileys/wink.gif',
-	':(':			'/images/smileys/sad.gif',
-	':D':			'/images/smileys/grin.gif',
-	':p': 			'/images/smileys/hehe.gif',
-	'8-)':			'/images/smileys/cool2.gif',
-	':@':			'/images/smileys/mad.gif',
-	'0_0':			'/images/smileys/eek.gif',
-	':E':			'/images/smileys/mrgreen.gif',
-	':O':			'/images/smileys/shocked.gif',
-	':s':			'/images/smileys/confused2.gif',
-	'^^':			'/images/smileys/eyebrows.gif',
-	":'(":			'/images/smileys/cry.gif',
-#	':-°':			('/images/smileys/whistle.gif', 'height: 15px;'),
-
-	# Name-based smileys.
-
-	':lol:':		'/images/smileys/lol.gif',
-	':oops:':		'/images/smileys/confused2.gif',
-	':grr:':		'/images/smileys/evil.gif',
-	':sry:':		'/images/smileys/redface.gif',
-	':mmm:':		'/images/smileys/rolleyes.gif',
-	':waza:':		'/images/smileys/waza.gif',
-#	':whistle:':	('/images/smileys/whistle.gif', 'height: 15px;'),
-	':here:':		'/images/smileys/pointer.gif',
-	':bow:':		'/images/smileys/bow.gif',
-	':cool:':		'/images/smileys/cool.gif',
-	':good:':		'/images/smileys/welldone.gif',
-	':love:':		'/images/smileys/love.gif',
-	':aie:':		'/images/smileys/banghead2.gif',
-	':cry:':		'/images/smileys/cry.gif',
-	':facepalm:':	'/images/smileys/facepalm.gif',
-	':argh:':		'/images/smileys/insults.gif',
-	':?:':			'/images/smileys/what.gif',
-	':!:':			'/images/smileys/excl.gif',
-	':arrow:':		'/images/smileys/here.gif',
-	':grin:':		'/images/smileys/grin.gif',
-}
-
-def _Smiley_sub(m):
-	return m.group(1) + '<img src="{}">'.format(_Smileys[m.group(2)]) \
-		+ m.group(3)
-
-_Smiley_re = _re.compile('(^|\\s)(' + '|'.join(map(_re.escape,
-	_Smileys.keys())) + ')(\\s|$)')
-
 # ---
 # Translator main class.
 # ---
@ -99,9 +25,13 @@ class Translator:
 		You can even chain calls as the `process()` method returns
 		the output stream object. """

-	def __init__(self, inp, outp):
+	def __init__(self, inp, outp, output_type):
 		""" Initializer. """

+		if output_type != 'html':
+			raise Exception("Invalid output type")
+		self.output_type = output_type
+
 		self.inp = inp
 		self.outp = outp

@ -152,17 +82,11 @@ class Translator:

 		# In all cases, we want to escape for HTML things, so that the
 		# user doesn't insert raw HTML tags (which would be a security flaw!).
-		text = _htmlescape(self.text_group)
+		if self.output_type == 'html':
+			text = _htmlescape(self.text_group)

-		if not self.raw_mode:
-			# Naked URL conversion.
-
-			text = _NakedURLs_re.sub(_NakedURLs_sub, text)
-
-			# Smiley conversion. Basically sub and replace.
-
-			text = _Smiley_re.sub(_Smiley_sub, text)
-			text = _Smiley_re.sub(_Smiley_sub, text)
+		if not self.raw_mode and self.output_type == 'html':
+			text = _htmlsmileys(_htmlurls(text))

 		return text

@ -401,7 +325,7 @@ class Translator:
 			# If the tag is unknown, output the full thing and just
 			# go on.

-			tag = get_tag(tagdata.name, tagdata.value, 'html')
+			tag = get_tag(tagdata.name, tagdata.value, self.output_type)
 			if not tag:
 				self.put_text(tagdata.full)
 				continue
--- a/textoutpc/urls.py
+++ b/textoutpc/urls.py
@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+""" Autolinking (URL extraction from raw text) in HTML. """
+
+import regex as _re
+from html import escape as _htmlescape
+
+__all__ = ["htmlurls"]
+
+# ---
+# Autolinking regex.
+# FIXME: they don't work yet by the look of it.
+# ---
+
+def _sub(m):
+	sp = m.group('sp')
+	url = m.group('url')
+	aft = ''
+
+	# Hack for the last comma.
+	if url[-1] == ',':
+		url, aft = url[:-1], ','
+
+	text = '{}<a href="{}" target="_blank" rel="noopener">{}</a>{}' \
+		.format(sp, url, url, aft)
+	return text
+
+_reg = _re.compile("""\
+	(?P<sp>^|\s|[[:punct:]])
+	(?P<url>(https?|ftp):
+		(?P<ucore>[^\[\]\(\)\s]* (\[(?&ucore)\]?)* (\((?&ucore)\)?)*)*
+	)
+""", _re.VERBOSE | _re.M)
+
+# ---
+# Main function.
+# ---
+
+def htmlurls(text):
+	return _reg.sub(_sub, text)
+
+# End of file.