From 3f7f182a3ca1033fec0a42805b45c59d00a41d7e Mon Sep 17 00:00:00 2001 From: egeekial Date: Wed, 3 Jun 2026 15:59:25 -0400 Subject: [PATCH] [newsfeed] add allowBasicHtmlTags option for basic emphasis Render a strict allowlist of basic formatting tags (b, strong, i, em, u) in news titles and descriptions, while neutralizing all other HTML. Feeds such as The Atlantic encode emphasis as entities (<em>), which html-to-text decoded to a literal string that the template then auto-escaped, so the raw tag was shown on screen. The new opt-in allowBasicHtmlTags option (default false) sanitizes both fields by escaping everything and restoring only the exact, attribute-free allowlisted tags, so the result is safe to render and arbitrary HTML/script injection is impossible. Adds unit tests for the sanitizer and an e2e test covering rendering and an injection attempt. Co-Authored-By: Claude Opus 4.8 --- defaultmodules/newsfeed/newsfeed.js | 3 +- defaultmodules/newsfeed/newsfeed.njk | 12 +-- defaultmodules/newsfeed/newsfeedfetcher.js | 80 ++++++++++++++++--- defaultmodules/newsfeed/node_helper.js | 2 +- tests/configs/modules/newsfeed/basic_html.js | 28 +++++++ tests/e2e/modules/newsfeed_spec.js | 26 ++++++ tests/mocks/newsfeed_basic_html.xml | 15 ++++ .../default/newsfeed/newsfeedfetcher_spec.js | 52 ++++++++++++ 8 files changed, 199 insertions(+), 19 deletions(-) create mode 100644 tests/configs/modules/newsfeed/basic_html.js create mode 100644 tests/mocks/newsfeed_basic_html.xml create mode 100644 tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js diff --git a/defaultmodules/newsfeed/newsfeed.js b/defaultmodules/newsfeed/newsfeed.js index fa1476faf5..d593b4c086 100644 --- a/defaultmodules/newsfeed/newsfeed.js +++ b/defaultmodules/newsfeed/newsfeed.js @@ -33,7 +33,8 @@ Module.register("newsfeed", { prohibitedWords: [], scrollLength: 500, logFeedWarnings: false, - dangerouslyDisableAutoEscaping: false + dangerouslyDisableAutoEscaping: false, + allowBasicHtmlTags: false }, getUrlPrefix (item) { diff --git a/defaultmodules/newsfeed/newsfeed.njk b/defaultmodules/newsfeed/newsfeed.njk index 9eacc261c2..57592c5971 100644 --- a/defaultmodules/newsfeed/newsfeed.njk +++ b/defaultmodules/newsfeed/newsfeed.njk @@ -45,13 +45,13 @@ {% if config.showPublishDate %}{{ item.publishDate }}:{% endif %} {% endif %} -
{{ escapeTitle(item.title, item.url, config.dangerouslyDisableAutoEscaping, config.showTitleAsUrl) }}
+
{{ escapeTitle(item.title, item.url, config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags, config.showTitleAsUrl) }}
{% if config.showDescription %}
{% if config.truncDescription %} - {{ escapeText(item.description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping) }} + {{ escapeText(item.description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags) }} {% else %} - {{ escapeText(item.description, config.dangerouslyDisableAutoEscaping) }} + {{ escapeText(item.description, config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags) }} {% endif %}
{% endif %} @@ -68,13 +68,13 @@ {% if config.showPublishDate %}{{ publishDate }}:{% endif %} {% endif %} -
{{ escapeTitle(title, url, config.dangerouslyDisableAutoEscaping, config.showTitleAsUrl) }}
+
{{ escapeTitle(title, url, config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags, config.showTitleAsUrl) }}
{% if config.showDescription %}
{% if config.truncDescription %} - {{ escapeText(description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping) }} + {{ escapeText(description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags) }} {% else %} - {{ escapeText(description, config.dangerouslyDisableAutoEscaping) }} + {{ escapeText(description, config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags) }} {% endif %}
{% endif %} diff --git a/defaultmodules/newsfeed/newsfeedfetcher.js b/defaultmodules/newsfeed/newsfeedfetcher.js index 1a5f421ab8..1942bba64b 100644 --- a/defaultmodules/newsfeed/newsfeedfetcher.js +++ b/defaultmodules/newsfeed/newsfeedfetcher.js @@ -6,6 +6,23 @@ const { htmlToText } = require("html-to-text"); const Log = require("logger"); const HTTPFetcher = require("#http_fetcher"); +// Inline formatting tags that are safe to render: bold, italic and underline. +// These never carry attributes once sanitized, so they cannot be used for injection. +const ALLOWED_TAGS = ["b", "strong", "i", "em", "u"]; + +// html-to-text formatter that re-emits an allowed inline tag around its content, +// so feeds that send real / elements keep their emphasis. +const keepTagFormatter = (elem, walk, builder, formatOptions) => { + builder.addLiteral(`<${formatOptions.tagName}>`); + walk(elem.children, builder); + builder.addLiteral(``); +}; + +const allowedTagSelectors = ALLOWED_TAGS.map((tagName) => ({ selector: tagName, format: "keepTag", options: { tagName } })); + +// Matches only the exact, attribute-free opening/closing allowlisted tags after escaping. +const restoreAllowedTags = new RegExp(`<(/?(?:${ALLOWED_TAGS.join("|")}))>`, "g"); + /** * NewsfeedFetcher - Fetches and parses RSS/Atom feed data * Uses HTTPFetcher for HTTP handling with intelligent error handling @@ -20,12 +37,14 @@ class NewsfeedFetcher { * @param {string} encoding - Encoding of the feed (e.g., 'UTF-8') * @param {boolean} logFeedWarnings - If true log warnings when there is an error parsing a news article * @param {boolean} useCorsProxy - If true cors proxy is used for article url's + * @param {boolean} allowBasicHtmlTags - If true keep basic formatting tags (bold/italic/underline) in title and description */ - constructor (url, reloadInterval, encoding, logFeedWarnings, useCorsProxy) { + constructor (url, reloadInterval, encoding, logFeedWarnings, useCorsProxy, allowBasicHtmlTags = false) { this.url = url; this.encoding = encoding; this.logFeedWarnings = logFeedWarnings; this.useCorsProxy = useCorsProxy; + this.allowBasicHtmlTags = allowBasicHtmlTags; this.items = []; this.fetchFailedCallback = () => {}; this.itemsReceivedCallback = () => {}; @@ -44,6 +63,37 @@ class NewsfeedFetcher { this.httpFetcher.on("error", (errorInfo) => this.fetchFailedCallback(this, errorInfo)); } + /** + * Sanitizes a feed string, keeping only a strict allowlist of basic + * formatting tags (bold, italic, underline) and neutralizing everything else. + * + * The approach is allowlist-only and therefore safe to render unescaped: + * html-to-text first strips all real markup (scripts, links, images, …) and + * decodes entities to text, then EVERYTHING is HTML-escaped and ONLY the exact, + * attribute-free allowlisted tags are restored. No attributes, event handlers, + * or other tags can survive, so arbitrary HTML/script injection is impossible. + * @param {string} html - The raw title or description from the feed. + * @returns {string} Safe HTML containing at most the allowlisted formatting tags. + */ + static sanitizeBasicHtml (html) { + const text = htmlToText(html, { + wordwrap: false, + formatters: { keepTag: keepTagFormatter }, + selectors: [ + { selector: "a", options: { ignoreHref: true, noAnchorUrl: true } }, + { selector: "br", format: "inlineSurround", options: { prefix: " " } }, + { selector: "img", format: "skip" }, + ...allowedTagSelectors + ] + }); + + return text + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replace(restoreAllowedTags, "<$1>"); + } + /** * Creates a parse error info object * @param {string} message - Error message @@ -78,22 +128,30 @@ class NewsfeedFetcher { const url = item.url || item.link || ""; if (title && pubdate) { - // Convert HTML entities, codes and tag - description = htmlToText(description, { - wordwrap: false, - selectors: [ - { selector: "a", options: { ignoreHref: true, noAnchorUrl: true } }, - { selector: "br", format: "inlineSurround", options: { prefix: " " } }, - { selector: "img", format: "skip" } - ] - }); + let displayTitle = title; + if (this.allowBasicHtmlTags) { + // Keep basic formatting (bold/italic/underline) in both fields, strip everything else + description = NewsfeedFetcher.sanitizeBasicHtml(description); + displayTitle = NewsfeedFetcher.sanitizeBasicHtml(title); + } else { + // Convert HTML entities, codes and tag + description = htmlToText(description, { + wordwrap: false, + selectors: [ + { selector: "a", options: { ignoreHref: true, noAnchorUrl: true } }, + { selector: "br", format: "inlineSurround", options: { prefix: " " } }, + { selector: "img", format: "skip" } + ] + }); + } this.items.push({ - title, + title: displayTitle, description, pubdate, url, useCorsProxy: this.useCorsProxy, + // Hash on the original title so the dedup identity is stable regardless of allowBasicHtmlTags hash: crypto.createHash("sha256").update(`${pubdate} :: ${title} :: ${url}`).digest("hex") }); } else if (this.logFeedWarnings) { diff --git a/defaultmodules/newsfeed/node_helper.js b/defaultmodules/newsfeed/node_helper.js index cede93f698..665da970fd 100644 --- a/defaultmodules/newsfeed/node_helper.js +++ b/defaultmodules/newsfeed/node_helper.js @@ -61,7 +61,7 @@ module.exports = NodeHelper.create({ let fetcher; if (typeof this.fetchers[url] === "undefined") { Log.log(`Create new newsfetcher for url: ${url} - Interval: ${reloadInterval}`); - fetcher = new NewsfeedFetcher(url, reloadInterval, encoding, config.logFeedWarnings, useCorsProxy); + fetcher = new NewsfeedFetcher(url, reloadInterval, encoding, config.logFeedWarnings, useCorsProxy, config.allowBasicHtmlTags); fetcher.onReceive(() => { this.broadcastFeeds(); diff --git a/tests/configs/modules/newsfeed/basic_html.js b/tests/configs/modules/newsfeed/basic_html.js new file mode 100644 index 0000000000..a1edac4980 --- /dev/null +++ b/tests/configs/modules/newsfeed/basic_html.js @@ -0,0 +1,28 @@ +let config = { + address: "0.0.0.0", + ipWhitelist: [], + timeFormat: 12, + + modules: [ + { + module: "newsfeed", + position: "bottom_bar", + config: { + feeds: [ + { + title: "Formatting Feed", + url: "http://localhost:8080/tests/mocks/newsfeed_basic_html.xml" + } + ], + showDescription: true, + truncDescription: false, + allowBasicHtmlTags: true + } + } + ] +}; + +/*************** DO NOT EDIT THE LINE BELOW ***************/ +if (typeof module !== "undefined") { + module.exports = config; +} diff --git a/tests/e2e/modules/newsfeed_spec.js b/tests/e2e/modules/newsfeed_spec.js index b68b6ea5a6..88a6205bfd 100644 --- a/tests/e2e/modules/newsfeed_spec.js +++ b/tests/e2e/modules/newsfeed_spec.js @@ -45,6 +45,32 @@ const runTests = () => { }); }); + describe("Basic HTML tags", () => { + beforeAll(async () => { + await helpers.startApplication("tests/configs/modules/newsfeed/basic_html.js"); + await helpers.getDocument(); + page = helpers.getPage(); + }); + + it("should render allowlisted formatting tags in title and description", async () => { + await expect(page.locator(".newsfeed .newsfeed-desc")).toBeVisible(); + const descHtml = await page.locator(".newsfeed .newsfeed-desc").innerHTML(); + expect(descHtml).toContain(""); + expect(descHtml).toContain(""); + expect(descHtml).toContain(""); + const titleHtml = await page.locator(".newsfeed .newsfeed-title").innerHTML(); + expect(titleHtml).toContain(""); + }); + + it("should strip disallowed HTML and not execute injected scripts", async () => { + const descHtml = await page.locator(".newsfeed .newsfeed-desc").innerHTML(); + expect(descHtml).not.toContain(" window.__newsfeedXss); + expect(xss).toBeUndefined(); + }); + }); + describe("Invalid configuration", () => { beforeAll(async () => { await helpers.startApplication("tests/configs/modules/newsfeed/incorrect_url.js"); diff --git a/tests/mocks/newsfeed_basic_html.xml b/tests/mocks/newsfeed_basic_html.xml new file mode 100644 index 0000000000..342564879e --- /dev/null +++ b/tests/mocks/newsfeed_basic_html.xml @@ -0,0 +1,15 @@ + + + + Formatting Feed + http://localhost:8080 + Feed used to test the allowBasicHtmlTags option. + + News <em>Flash</em> + http://localhost:8080/article + Tue, 20 Sep 2016 11:16:08 +0000 + http://localhost:8080/?p=1 + Italic and Bold and Underlined text.

]]>
+
+
+
diff --git a/tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js b/tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js new file mode 100644 index 0000000000..0379bc32be --- /dev/null +++ b/tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js @@ -0,0 +1,52 @@ +const defaults = require("../../../../../js/defaults"); + +const NewsfeedFetcher = require(`../../../../../${defaults.defaultModulesDir}/newsfeed/newsfeedfetcher`); + +const { sanitizeBasicHtml } = NewsfeedFetcher; + +describe("NewsfeedFetcher.sanitizeBasicHtml", () => { + it("keeps real basic formatting tags", () => { + expect(sanitizeBasicHtml("a b c d e")) + .toBe("a b c d e"); + }); + + it("renders entity-encoded formatting tags (e.g. The Atlantic feed)", () => { + // Feeds like theatlantic.com ship emphasis as escaped entities + expect(sanitizeBasicHtml("the <em>Atlantic</em> ocean")).toBe("the Atlantic ocean"); + }); + + it("handles emphasis inside titles regardless of how the parser delivers it", () => { + // The Atlantic uses in titles, e.g. "That's Enough, Euphoria" + const expected = "That’s Enough, Euphoria"; + expect(sanitizeBasicHtml("That’s Enough, Euphoria")).toBe(expected); + expect(sanitizeBasicHtml("That’s Enough, <em>Euphoria</em>")).toBe(expected); + }); + + it("strips attributes from allowed tags", () => { + const result = sanitizeBasicHtml("bold"); + expect(result).toBe("bold"); + expect(result).not.toContain("onclick"); + expect(result).not.toContain("class"); + }); + + it("neutralizes script tags", () => { + expect(sanitizeBasicHtml("hello")).not.toContain(" { + const result = sanitizeBasicHtml("link

title

"); + expect(result).not.toContain("onerror"); + expect(result).not.toContain("href"); + expect(result).not.toContain("

"); + expect(result).toContain("link"); + expect(result.toLowerCase()).toContain("title"); + }); + + it("escapes bare HTML special characters in plain text", () => { + expect(sanitizeBasicHtml("Fish & Chips for < 5")).toBe("Fish & Chips for < 5"); + }); +});