textout/textoutpc/urls.py

#!/usr/bin/env python3
#******************************************************************************
# Copyright (C) 2018 Thomas "Cakeisalie5" Touhey <thomas@touhey.fr>
# This file is part of the textoutpc project, which is MIT-licensed.
#******************************************************************************
""" Autolinking (URL extraction from raw text) in HTML. """

import regex as _re

__all__ = ["htmlurls", "lightscripturls"]

# ---
# Autolinking regex.
# ---

def _sub_html(m):
	sp = m.group('sp')
	url = m.group('url')
	aft = ''

	# Hack for the last comma.
	if url[-1] == ',':
		url, aft = url[:-1], ','

	text = '{}<a href="{}">{}</a>{}' \
		.format(sp, url, url, aft)
	return text

def _sub_lightscript(m):
	sp = m.group('sp')
	url = m.group('url')
	aft = ''

	# Hack for the last comma.
	if url[-1] == ',':
		url, aft = url[:-1], ','

	url = url.replace('<', '%3C')
	url = url.replace('>', '%3E')
	text = '{}<{}>{}'.format(sp, url, aft)
	return text

_reg = _re.compile("""\
	(?P<sp>^|\s|[[:punct:]])
	(?P<url>(https?|ftp):
		(?P<ucore>[^\[\]\(\)\s]* (\[(?&ucore)\]?)* (\((?&ucore)\)?)*)*
	)
""", _re.VERBOSE | _re.M)

# ---
# Main functions.
# ---

def htmlurls(text):
	return _reg.sub(_sub_html, text)

def lightscripturls(text):
	return _reg.sub(_sub_lightscript, text)

# End of file.
All unit tests passed! 2018-01-19 22:44:43 +01:00			`#!/usr/bin/env python3`
Copyright notices, and save before trying to use a graphical debugger. 2018-02-19 19:50:26 +01:00			`#******************************************************************************`
			`# Copyright (C) 2018 Thomas "Cakeisalie5" Touhey <thomas@touhey.fr>`
			`# This file is part of the textoutpc project, which is MIT-licensed.`
			`#******************************************************************************`
All unit tests passed! 2018-01-19 22:44:43 +01:00			`""" Autolinking (URL extraction from raw text) in HTML. """`

			`import regex as _re`

Doesn't work yet with block logic, still working on it, just saving it. 2018-02-11 12:01:32 +01:00			`__all__ = ["htmlurls", "lightscripturls"]`
All unit tests passed! 2018-01-19 22:44:43 +01:00
			`# ---`
			`# Autolinking regex.`
			`# ---`

Doesn't work yet with block logic, still working on it, just saving it. 2018-02-11 12:01:32 +01:00			`def _sub_html(m):`
All unit tests passed! 2018-01-19 22:44:43 +01:00			`sp = m.group('sp')`
			`url = m.group('url')`
			`aft = ''`

			`# Hack for the last comma.`
			`if url[-1] == ',':`
			`url, aft = url[:-1], ','`

Prepared packaging & stuff 2018-02-11 21:31:39 +01:00			`text = '{}<a href="{}">{}</a>{}' \`
All unit tests passed! 2018-01-19 22:44:43 +01:00			`.format(sp, url, url, aft)`
			`return text`

Doesn't work yet with block logic, still working on it, just saving it. 2018-02-11 12:01:32 +01:00			`def _sub_lightscript(m):`
			`sp = m.group('sp')`
			`url = m.group('url')`
			`aft = ''`

			`# Hack for the last comma.`
			`if url[-1] == ',':`
			`url, aft = url[:-1], ','`

			`url = url.replace('<', '%3C')`
			`url = url.replace('>', '%3E')`
			`text = '{}<{}>{}'.format(sp, url, aft)`
			`return text`

All unit tests passed! 2018-01-19 22:44:43 +01:00			`_reg = _re.compile("""\`
			`(?P<sp>^\|\s\|[[:punct:]])`
			`(?P<url>(https?\|ftp):`
			`(?P<ucore>[^\[\]\(\)\s]* (\[(?&ucore)\]?)* (\((?&ucore)\)?))`
			`)`
			`""", _re.VERBOSE \| _re.M)`

			`# ---`
Doesn't work yet with block logic, still working on it, just saving it. 2018-02-11 12:01:32 +01:00			`# Main functions.`
All unit tests passed! 2018-01-19 22:44:43 +01:00			`# ---`

			`def htmlurls(text):`
Doesn't work yet with block logic, still working on it, just saving it. 2018-02-11 12:01:32 +01:00			`return _reg.sub(_sub_html, text)`

			`def lightscripturls(text):`
			`return _reg.sub(_sub_lightscript, text)`
All unit tests passed! 2018-01-19 22:44:43 +01:00
			`# End of file.`