move messageToNode into new IRCFormatter class

Preparing to add more complexity to handle formats.
This commit is contained in:
Lephenixnoir 2023-08-09 14:38:00 +02:00
parent 3a3527c6f9
commit 358a5f107e
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
1 changed files with 132 additions and 106 deletions

View File

@ -345,110 +345,6 @@ let shoutbox = new function() {
shoutbox.selectChannel(channel);
}
this.messageToNode = function(message, element) {
const rURL = /(https?:\/\/|ftp:\/\/|magnet:)/d;
const fURL = (match) => {
/* We've found the protocol, now read the arguments until whitespace or
unbalanced closing parenthesis */
let i = match.indices[0][1];
let par_depth = 0;
while(i < match.input.length && !/\s/.test(match.input[i])) {
par_depth += (match.input[i] == "(");
par_depth -= (match.input[i] == ")");
if(par_depth < 0)
break;
i++;
}
/* Don't count the last character if it's a quote or comma */
if(i > 0 && /[",]/.test(match.input[i-1]))
i--;
const url = match.input.substring(match.indices[0][0], i);
const a = document.createElement("a");
a.href = url;
a.target = "_blank";
a.appendChild(document.createTextNode(url));
element.appendChild(a);
return url.length;
};
/* Match up to the start of the link */
const rLink = /\[([^\]]+)\]\((https?:\/\/|ftp:\/\/|magnet:)/d;
const fLink = (match) => {
/* Read link while keeping balance parentheses. Compared to raw URLs, we
allow spaces, and also keep quotes/commas as last characters since
there is a reasonable delimiter */
let i = match.indices[0][1];
let par_depth = 0;
while(i < match.input.length) {
par_depth += (match.input[i] == "(");
par_depth -= (match.input[i] == ")");
if(par_depth < 0)
break;
i++;
}
const url = match.input.substring(match.indices[2][0], i);
const a = document.createElement("a");
a.href = url;
a.target = "_blank";
a.appendChild(document.createTextNode(match[1]));
element.appendChild(a);
return i + (i < match.input.length) - match.indices[0][0];
};
const rInlineCode = /`([^`]+)`/;
const fInlineCode = (match) => {
const code = document.createElement("code");
code.appendChild(document.createTextNode(match[1]));
element.appendChild(code);
return match[0].length;
}
/* List of matchers: regex, handling function, match object, index. */
let matchers = [
[rURL, fURL, null, -1],
[rLink, fLink, null, -1],
[rInlineCode, fInlineCode, null, -1],
];
/* Repeatedly find the next segment to convert. */
let i = 0;
while(i < message.length) {
let next = message.length;
let next_matcher = null;
/* Update the next matches for all regexes and find the one that matches
the earliest. */
for(const m of matchers) {
if(m[3] < i) {
m[0].lastIndex = 0;
m[2] = m[0].exec(message.substring(i));
m[3] = (m[2] !== null) ? i + m[2].index : -1;
}
if(m[3] >= 0 && m[3] < next) {
next = m[3];
next_matcher = m;
}
}
/* Find the closest one. If it's not at offset 0, do a text node. */
if(next > i) {
const sub = message.substring(i, next);
element.appendChild(document.createTextNode(sub));
i = next;
}
if(next_matcher !== null) {
i += next_matcher[1](next_matcher[2]);
next_matcher[2] = null;
next_matcher[3] = -1;
}
}
}
this.addNewMessage = function(channel, date, author, message) {
const view = this.getChannelView(channel);
if(view === undefined)
@ -467,7 +363,8 @@ let shoutbox = new function() {
messageElement.appendChild(authorElement);
let messageContentElement = document.createElement("p");
messageContentElement.classList.add("message-content");
this.messageToNode(message, messageContentElement);
const fmt = new IRCFormatter(message, messageContentElement);
fmt.messageToNode();
messageElement.appendChild(messageContentElement);
let dateElement = document.createElement("div");
dateElement.classList.add("message-date");
@ -518,8 +415,137 @@ let shoutbox = new function() {
}
};
/* IRC-to-HTML formatting.
This is less trivial that it sounds because of the stateful nature of IRC's
formatting system. Specific escapes enable, disable, toggle, reset formats,
not necessarily in any well-parenthesized order.
The bulk of this formatter consists of:
- Parsing interesting stuff (URLs, [links](...), etc.) with basic regular
expressions (goal: maximum flexbility)
- An internal node/group representation used to resolve stateful formats */
class IRCFormatter {
/* The internal representation is a tree of Nodes (text, links, etc) and
Format changes, arranged by concatenation in Groups (sequences). Nodes can
contain other groups for nesting. The format application algorithm is a
dual top-down + bottom-up pass to collect formats and find the series of
tags needed to achieve the correct format. */
static rURL = /(https?:\/\/|ftp:\/\/|magnet:)/d;
static rLink = /\[([^\]]+)\]\((https?:\/\/|ftp:\/\/|magnet:)/d;
static rInlineCode = /`([^`]+)`/;
fURL(match, element) {
/* We've found the protocol, now read the arguments until whitespace or
unbalanced closing parenthesis */
let i = match.indices[0][1];
let par_depth = 0;
while(i < match.input.length && !/\s/.test(match.input[i])) {
par_depth += (match.input[i] == "(");
par_depth -= (match.input[i] == ")");
if(par_depth < 0)
break;
i++;
}
/* Don't count the last character if it's a quote or comma */
if(i > 0 && /[",]/.test(match.input[i-1]))
i--;
const url = match.input.substring(match.indices[0][0], i);
const a = document.createElement("a");
a.href = url;
a.target = "_blank";
a.appendChild(document.createTextNode(url));
element.appendChild(a);
return url.length;
};
fLink(match, element) {
/* Read link while keeping balance parentheses. Compared to raw URLs, we
allow spaces, and also keep quotes/commas as last characters since
there is a reasonable delimiter */
let i = match.indices[0][1];
let par_depth = 0;
while(i < match.input.length) {
par_depth += (match.input[i] == "(");
par_depth -= (match.input[i] == ")");
if(par_depth < 0)
break;
i++;
}
const url = match.input.substring(match.indices[2][0], i);
const a = document.createElement("a");
a.href = url;
a.target = "_blank";
a.appendChild(document.createTextNode(match[1]));
element.appendChild(a);
return i + (i < match.input.length) - match.indices[0][0];
};
fInlineCode(match, element) {
const code = document.createElement("code");
code.appendChild(document.createTextNode(match[1]));
element.appendChild(code);
return match[0].length;
}
constructor(message, element) {
this.message = message;
this.element = element;
}
messageToNode() {
/* List of matchers: regex, handling function, match object, index. */
let matchers = [
[IRCFormatter.rURL, this.fURL, null, -1],
[IRCFormatter.rLink, this.fLink, null, -1],
[IRCFormatter.rInlineCode, this.fInlineCode, null, -1],
];
/* Repeatedly find the next segment to convert. */
let message = this.message;
let i = 0;
while(i < message.length) {
let next = message.length;
let next_matcher = null;
/* Update the next matches for all regexes and find the one that matches
the earliest. */
for(const m of matchers) {
if(m[3] < i) {
m[0].lastIndex = 0;
m[2] = m[0].exec(message.substring(i));
m[3] = (m[2] !== null) ? i + m[2].index : -1;
}
if(m[3] >= 0 && m[3] < next) {
next = m[3];
next_matcher = m;
}
}
/* Find the closest one. If it's not at offset 0, do a text node. */
if(next > i) {
const sub = message.substring(i, next);
this.element.appendChild(document.createTextNode(sub));
i = next;
}
if(next_matcher !== null) {
i += next_matcher[1].bind(this)(next_matcher[2], this.element);
next_matcher[2] = null;
next_matcher[3] = -1;
}
}
}
};
/* We initialize the shoutbox once the SharedChat has finished its async init
*and* the DOMContentLoaded even has been fired. */
*and* the DOMContentLoaded event has been fired. */
let sc = new SharedChat();
const sc_init_promise = sc.init();