move messageToNode into new IRCFormatter class

Preparing to add more complexity to handle formats.
2023-08-09 14:38:00 +02:00 · 2023-08-09 14:38:00 +02:00 · 358a5f107e
parent 3a3527c6f9
commit 358a5f107e
1 changed files with 132 additions and 106 deletions
--- a/v5shoutbox.js
+++ b/v5shoutbox.js
@ -345,110 +345,6 @@ let shoutbox = new function() {
      shoutbox.selectChannel(channel);
  }

-  this.messageToNode = function(message, element) {
-    const rURL = /(https?:\/\/|ftp:\/\/|magnet:)/d;
-    const fURL = (match) => {
-      /* We've found the protocol, now read the arguments until whitespace or
-         unbalanced closing parenthesis */
-      let i = match.indices[0][1];
-      let par_depth = 0;
-
-      while(i < match.input.length && !/\s/.test(match.input[i])) {
-        par_depth += (match.input[i] == "(");
-        par_depth -= (match.input[i] == ")");
-        if(par_depth < 0)
-          break;
-        i++;
-      }
-
-      /* Don't count the last character if it's a quote or comma */
-      if(i > 0 && /[",]/.test(match.input[i-1]))
-        i--;
-
-      const url = match.input.substring(match.indices[0][0], i);
-      const a = document.createElement("a");
-      a.href = url;
-      a.target = "_blank";
-      a.appendChild(document.createTextNode(url));
-      element.appendChild(a);
-      return url.length;
-    };
-
-    /* Match up to the start of the link */
-    const rLink = /\[([^\]]+)\]\((https?:\/\/|ftp:\/\/|magnet:)/d;
-    const fLink = (match) => {
-      /* Read link while keeping balance parentheses. Compared to raw URLs, we
-         allow spaces, and also keep quotes/commas as last characters since
-         there is a reasonable delimiter */
-      let i = match.indices[0][1];
-      let par_depth = 0;
-
-      while(i < match.input.length) {
-        par_depth += (match.input[i] == "(");
-        par_depth -= (match.input[i] == ")");
-        if(par_depth < 0)
-          break;
-        i++;
-      }
-
-      const url = match.input.substring(match.indices[2][0], i);
-      const a = document.createElement("a");
-      a.href = url;
-      a.target = "_blank";
-      a.appendChild(document.createTextNode(match[1]));
-      element.appendChild(a);
-      return i + (i < match.input.length) - match.indices[0][0];
-    };
-
-    const rInlineCode = /`([^`]+)`/;
-    const fInlineCode = (match) => {
-      const code = document.createElement("code");
-      code.appendChild(document.createTextNode(match[1]));
-      element.appendChild(code);
-      return match[0].length;
-    }
-
-    /* List of matchers: regex, handling function, match object, index. */
-    let matchers = [
-      [rURL,        fURL,        null, -1],
-      [rLink,       fLink,       null, -1],
-      [rInlineCode, fInlineCode, null, -1],
-    ];
-
-    /* Repeatedly find the next segment to convert. */
-    let i = 0;
-    while(i < message.length) {
-      let next = message.length;
-      let next_matcher = null;
-
-      /* Update the next matches for all regexes and find the one that matches
-         the earliest. */
-      for(const m of matchers) {
-        if(m[3] < i) {
-          m[0].lastIndex = 0;
-          m[2] = m[0].exec(message.substring(i));
-          m[3] = (m[2] !== null) ? i + m[2].index : -1;
-        }
-        if(m[3] >= 0 && m[3] < next) {
-          next = m[3];
-          next_matcher = m;
-        }
-      }
-
-      /* Find the closest one. If it's not at offset 0, do a text node. */
-      if(next > i) {
-        const sub = message.substring(i, next);
-        element.appendChild(document.createTextNode(sub));
-        i = next;
-      }
-      if(next_matcher !== null) {
-        i += next_matcher[1](next_matcher[2]);
-        next_matcher[2] = null;
-        next_matcher[3] = -1;
-      }
-    }
-  }
-
  this.addNewMessage = function(channel, date, author, message) {
    const view = this.getChannelView(channel);
    if(view === undefined)
@ -467,7 +363,8 @@ let shoutbox = new function() {
    messageElement.appendChild(authorElement);
    let messageContentElement = document.createElement("p");
    messageContentElement.classList.add("message-content");
-    this.messageToNode(message, messageContentElement);
+    const fmt = new IRCFormatter(message, messageContentElement);
+    fmt.messageToNode();
    messageElement.appendChild(messageContentElement);
    let dateElement = document.createElement("div");
    dateElement.classList.add("message-date");
@ -518,8 +415,137 @@ let shoutbox = new function() {
  }
 };

+/* IRC-to-HTML formatting.
+
+   This is less trivial that it sounds because of the stateful nature of IRC's
+   formatting system. Specific escapes enable, disable, toggle, reset formats,
+   not necessarily in any well-parenthesized order.
+
+   The bulk of this formatter consists of:
+   - Parsing interesting stuff (URLs, [links](...), etc.) with basic regular
+     expressions (goal: maximum flexbility)
+   - An internal node/group representation used to resolve stateful formats */
+class IRCFormatter {
+  /* The internal representation is a tree of Nodes (text, links, etc) and
+     Format changes, arranged by concatenation in Groups (sequences). Nodes can
+     contain other groups for nesting. The format application algorithm is a
+     dual top-down + bottom-up pass to collect formats and find the series of
+     tags needed to achieve the correct format. */
+
+
+  static rURL = /(https?:\/\/|ftp:\/\/|magnet:)/d;
+  static rLink = /\[([^\]]+)\]\((https?:\/\/|ftp:\/\/|magnet:)/d;
+  static rInlineCode = /`([^`]+)`/;
+
+  fURL(match, element) {
+    /* We've found the protocol, now read the arguments until whitespace or
+       unbalanced closing parenthesis */
+    let i = match.indices[0][1];
+    let par_depth = 0;
+
+    while(i < match.input.length && !/\s/.test(match.input[i])) {
+      par_depth += (match.input[i] == "(");
+      par_depth -= (match.input[i] == ")");
+      if(par_depth < 0)
+        break;
+      i++;
+    }
+
+    /* Don't count the last character if it's a quote or comma */
+    if(i > 0 && /[",]/.test(match.input[i-1]))
+      i--;
+
+    const url = match.input.substring(match.indices[0][0], i);
+    const a = document.createElement("a");
+    a.href = url;
+    a.target = "_blank";
+    a.appendChild(document.createTextNode(url));
+    element.appendChild(a);
+    return url.length;
+  };
+
+  fLink(match, element) {
+    /* Read link while keeping balance parentheses. Compared to raw URLs, we
+       allow spaces, and also keep quotes/commas as last characters since
+       there is a reasonable delimiter */
+    let i = match.indices[0][1];
+    let par_depth = 0;
+
+    while(i < match.input.length) {
+      par_depth += (match.input[i] == "(");
+      par_depth -= (match.input[i] == ")");
+      if(par_depth < 0)
+        break;
+      i++;
+    }
+
+    const url = match.input.substring(match.indices[2][0], i);
+    const a = document.createElement("a");
+    a.href = url;
+    a.target = "_blank";
+    a.appendChild(document.createTextNode(match[1]));
+    element.appendChild(a);
+    return i + (i < match.input.length) - match.indices[0][0];
+  };
+
+  fInlineCode(match, element) {
+    const code = document.createElement("code");
+    code.appendChild(document.createTextNode(match[1]));
+    element.appendChild(code);
+    return match[0].length;
+  }
+
+  constructor(message, element) {
+    this.message = message;
+    this.element = element;
+  }
+
+  messageToNode() {
+    /* List of matchers: regex, handling function, match object, index. */
+    let matchers = [
+      [IRCFormatter.rURL,        this.fURL,        null, -1],
+      [IRCFormatter.rLink,       this.fLink,       null, -1],
+      [IRCFormatter.rInlineCode, this.fInlineCode, null, -1],
+    ];
+
+    /* Repeatedly find the next segment to convert. */
+    let message = this.message;
+    let i = 0;
+    while(i < message.length) {
+      let next = message.length;
+      let next_matcher = null;
+
+      /* Update the next matches for all regexes and find the one that matches
+         the earliest. */
+      for(const m of matchers) {
+        if(m[3] < i) {
+          m[0].lastIndex = 0;
+          m[2] = m[0].exec(message.substring(i));
+          m[3] = (m[2] !== null) ? i + m[2].index : -1;
+        }
+        if(m[3] >= 0 && m[3] < next) {
+          next = m[3];
+          next_matcher = m;
+        }
+      }
+
+      /* Find the closest one. If it's not at offset 0, do a text node. */
+      if(next > i) {
+        const sub = message.substring(i, next);
+        this.element.appendChild(document.createTextNode(sub));
+        i = next;
+      }
+      if(next_matcher !== null) {
+        i += next_matcher[1].bind(this)(next_matcher[2], this.element);
+        next_matcher[2] = null;
+        next_matcher[3] = -1;
+      }
+    }
+  }
+};
+
 /* We initialize the shoutbox once the SharedChat has finished its async init
-   *and* the DOMContentLoaded even has been fired. */
+   *and* the DOMContentLoaded event has been fired. */

 let sc = new SharedChat();
 const sc_init_promise = sc.init();