2
0
Fork 0
textout/textoutpc/urls.py

61 lines
1.3 KiB
Python
Raw Normal View History

2018-01-19 22:44:43 +01:00
#!/usr/bin/env python3
#******************************************************************************
# Copyright (C) 2018 Thomas "Cakeisalie5" Touhey <thomas@touhey.fr>
# This file is part of the textoutpc project, which is MIT-licensed.
#******************************************************************************
2018-01-19 22:44:43 +01:00
""" Autolinking (URL extraction from raw text) in HTML. """
import regex as _re
__all__ = ["htmlurls", "lightscripturls"]
2018-01-19 22:44:43 +01:00
# ---
# Autolinking regex.
# ---
def _sub_html(m):
2018-01-19 22:44:43 +01:00
sp = m.group('sp')
url = m.group('url')
aft = ''
# Hack for the last comma.
if url[-1] == ',':
url, aft = url[:-1], ','
2018-02-11 21:31:39 +01:00
text = '{}<a href="{}">{}</a>{}' \
2018-01-19 22:44:43 +01:00
.format(sp, url, url, aft)
return text
def _sub_lightscript(m):
sp = m.group('sp')
url = m.group('url')
aft = ''
# Hack for the last comma.
if url[-1] == ',':
url, aft = url[:-1], ','
url = url.replace('<', '%3C')
url = url.replace('>', '%3E')
text = '{}<{}>{}'.format(sp, url, aft)
return text
2018-01-19 22:44:43 +01:00
_reg = _re.compile("""\
(?P<sp>^|\s|[[:punct:]])
(?P<url>(https?|ftp):
(?P<ucore>[^\[\]\(\)\s]* (\[(?&ucore)\]?)* (\((?&ucore)\)?)*)*
)
""", _re.VERBOSE | _re.M)
# ---
# Main functions.
2018-01-19 22:44:43 +01:00
# ---
def htmlurls(text):
return _reg.sub(_sub_html, text)
def lightscripturls(text):
return _reg.sub(_sub_lightscript, text)
2018-01-19 22:44:43 +01:00
# End of file.