From 84ee1d9cd98339b803429e63dec47a650b3491f3 Mon Sep 17 00:00:00 2001 From: Tilman Vatteroth Date: Tue, 2 Nov 2021 08:15:33 +0100 Subject: [PATCH] Add dom purify (#1609) Signed-off-by: Tilman Vatteroth --- cypress/integration/linkSchemes.spec.ts | 32 +++------ package.json | 70 ++++++++++--------- .../hooks/dom-purifier-node-preprocessor.ts | 25 +++++++ .../use-convert-markdown-to-react-dom.ts | 7 +- .../process-reveal-comment-nodes.ts | 2 +- .../replace-legacy-slideshare-short-code.ts | 2 +- .../katex/katex-replacer.tsx | 27 +++---- .../link-replacer/link-replacer.tsx | 7 -- yarn.lock | 12 ++++ 9 files changed, 103 insertions(+), 81 deletions(-) create mode 100644 src/components/markdown-renderer/hooks/dom-purifier-node-preprocessor.ts diff --git a/cypress/integration/linkSchemes.spec.ts b/cypress/integration/linkSchemes.spec.ts index 82c09275a..e98dd7b0b 100644 --- a/cypress/integration/linkSchemes.spec.ts +++ b/cypress/integration/linkSchemes.spec.ts @@ -20,30 +20,22 @@ describe('markdown formatted links to', () => { it('note anchor references render as anchor link', () => { cy.setCodemirrorContent('[anchor](#anchor)') - cy.getMarkdownBody() - .find('a') - .should('have.attr', 'href', 'http://127.0.0.1:3001/n/test#anchor') + cy.getMarkdownBody().find('a').should('have.attr', 'href', 'http://127.0.0.1:3001/n/test#anchor') }) it('internal pages render as internal link', () => { cy.setCodemirrorContent('[internal](other-note)') - cy.getMarkdownBody() - .find('a') - .should('have.attr', 'href', 'http://127.0.0.1:3001/n/other-note') + cy.getMarkdownBody().find('a').should('have.attr', 'href', 'http://127.0.0.1:3001/n/other-note') }) it('data URIs do not render', () => { cy.setCodemirrorContent('[data](data:text/plain,evil)') - cy.getMarkdownBody() - .find('a') - .should('not.exist') + cy.getMarkdownBody().find('a').should('not.exist') }) it('javascript URIs do not render', () => { cy.setCodemirrorContent('[js](javascript:alert("evil"))') - cy.getMarkdownBody() - .find('a') - .should('not.exist') + cy.getMarkdownBody().find('a').should('not.exist') }) }) @@ -63,29 +55,21 @@ describe('HTML anchor element links to', () => { it('note anchor references render as anchor link', () => { cy.setCodemirrorContent('anchor') - cy.getMarkdownBody() - .find('a') - .should('have.attr', 'href', 'http://127.0.0.1:3001/n/test#anchor') + cy.getMarkdownBody().find('a').should('have.attr', 'href', 'http://127.0.0.1:3001/n/test#anchor') }) it('internal pages render as internal link', () => { cy.setCodemirrorContent('internal') - cy.getMarkdownBody() - .find('a') - .should('have.attr', 'href', 'http://127.0.0.1:3001/n/other-note') + cy.getMarkdownBody().find('a').should('have.attr', 'href', 'http://127.0.0.1:3001/n/other-note') }) it('data URIs do not render', () => { cy.setCodemirrorContent('data') - cy.getMarkdownBody() - .find('a') - .should('not.exist') + cy.getMarkdownBody().find('a').should('not.have.attr', 'href') }) it('javascript URIs do not render', () => { cy.setCodemirrorContent('js') - cy.getMarkdownBody() - .find('a') - .should('not.exist') + cy.getMarkdownBody().find('a').should('not.have.attr', 'href') }) }) diff --git a/package.json b/package.json index e0c04b581..a73172128 100644 --- a/package.json +++ b/package.json @@ -79,18 +79,52 @@ }, "dependencies": {}, "devDependencies": { + "@craco/craco": "6.4.0", + "@cypress/webpack-preprocessor": "5.9.1", + "@fontsource/source-sans-pro": "4.5.0", + "@hedgedoc/html-to-react": "1.1.0", + "@hedgedoc/markdown-it-image-size": "1.0.1", + "@hedgedoc/markdown-it-task-lists": "1.0.2", + "@matejmazur/react-katex": "3.1.3", + "@redux-devtools/core": "3.9.0", + "@testing-library/cypress": "8.0.1", + "@testing-library/jest-dom": "5.14.1", + "@testing-library/react": "12.1.2", + "@testing-library/user-event": "13.5.0", + "@types/codemirror": "5.60.5", + "@types/d3-graphviz": "2.6.7", + "@types/diff": "5.0.1", + "@types/dompurify": "2.3.1", + "@types/jest": "27.0.2", + "@types/js-yaml": "4.0.4", + "@types/luxon": "2.0.5", + "@types/markdown-it": "12.2.3", + "@types/markdown-it-container": "2.0.4", + "@types/markdown-it-plantuml": "1.4.1", + "@types/mermaid": "8.2.7", + "@types/node": "16.11.6", + "@types/react": "17.0.33", + "@types/react-bootstrap-typeahead": "5.1.8", + "@types/react-dom": "17.0.10", + "@types/react-router": "5.1.17", + "@types/react-router-bootstrap": "0.24.5", + "@types/react-router-dom": "5.3.2", + "@types/redux-devtools": "3.0.47", + "@types/sass": "1.43.0", + "@types/uuid": "8.3.1", + "@typescript-eslint/eslint-plugin": "5.2.0", + "@typescript-eslint/parser": "5.2.0", "abcjs": "6.0.0-beta.33", "bootstrap": "4.6.1", "codemirror": "5.63.3", "copy-webpack-plugin": "6.4.1", - "@craco/craco": "6.4.0", "cross-env": "7.0.3", "cypress": "7.7.0", "cypress-commands": "1.1.0", "cypress-file-upload": "5.0.8", - "@cypress/webpack-preprocessor": "5.9.1", "d3-graphviz": "3.2.0", "diff": "5.0.0", + "dompurify": "2.3.3", "emoji-picker-element": "1.8.2", "emoji-picker-element-data": "1.2.0", "eslint-config-prettier": "8.3.0", @@ -104,11 +138,7 @@ "fast-deep-equal": "3.1.3", "firacode": "5.2.0", "flowchart.js": "1.17.0", - "@fontsource/source-sans-pro": "4.5.0", "fork-awesome": "1.2.0", - "@hedgedoc/html-to-react": "1.1.0", - "@hedgedoc/markdown-it-image-size": "1.0.1", - "@hedgedoc/markdown-it-task-lists": "1.0.2", "highlight.js": "11.3.1", "http-server": "14.0.0", "i18next": "21.3.3", @@ -136,7 +166,6 @@ "markmap-common": "0.1.5", "markmap-lib": "0.11.6", "markmap-view": "0.2.6", - "@matejmazur/react-katex": "3.1.3", "mermaid": "8.13.3", "optional-js": "2.3.0", "prettier": "2.4.1", @@ -155,41 +184,14 @@ "react-scripts": "4.0.3", "react-use": "17.3.1", "redux": "4.1.2", - "@redux-devtools/core": "3.9.0", "redux-devtools-extension": "2.13.9", "reveal.js": "4.1.3", "sanitize-filename": "1.6.3", "sass": "1.43.4", - "@testing-library/cypress": "8.0.1", - "@testing-library/jest-dom": "5.14.1", - "@testing-library/react": "12.1.2", - "@testing-library/user-event": "13.5.0", "ts-loader": "9.2.6", "ts-mockery": "1.2.0", "twemoji-colr-font": "0.0.4", - "@types/codemirror": "5.60.5", "typescript": "4.4.4", - "@typescript-eslint/eslint-plugin": "5.2.0", - "@typescript-eslint/parser": "5.2.0", - "@types/d3-graphviz": "2.6.7", - "@types/diff": "5.0.1", - "@types/jest": "27.0.2", - "@types/js-yaml": "4.0.4", - "@types/luxon": "2.0.5", - "@types/markdown-it": "12.2.3", - "@types/markdown-it-container": "2.0.4", - "@types/markdown-it-plantuml": "1.4.1", - "@types/mermaid": "8.2.7", - "@types/node": "16.11.6", - "@types/react": "17.0.33", - "@types/react-bootstrap-typeahead": "5.1.8", - "@types/react-dom": "17.0.10", - "@types/react-router": "5.1.17", - "@types/react-router-bootstrap": "0.24.5", - "@types/react-router-dom": "5.3.2", - "@types/redux-devtools": "3.0.47", - "@types/sass": "1.43.0", - "@types/uuid": "8.3.1", "use-resize-observer": "8.0.0", "uuid": "8.3.2", "vega": "5.21.0", diff --git a/src/components/markdown-renderer/hooks/dom-purifier-node-preprocessor.ts b/src/components/markdown-renderer/hooks/dom-purifier-node-preprocessor.ts new file mode 100644 index 000000000..2c5746bc2 --- /dev/null +++ b/src/components/markdown-renderer/hooks/dom-purifier-node-preprocessor.ts @@ -0,0 +1,25 @@ +/* + * SPDX-FileCopyrightText: 2021 The HedgeDoc developers (see AUTHORS file) + * + * SPDX-License-Identifier: AGPL-3.0-only + */ + +import type { Document } from 'domhandler' +import render from 'dom-serializer' +import DOMPurify from 'dompurify' +import { parseDocument } from 'htmlparser2' + +const customTags = ['app-linemarker', 'app-katex', 'app-gist', 'app-youtube', 'app-vimeo', 'app-asciinema'] + +/** + * Sanitizes the given {@link Document document}. + * + * @param document The dirty document + * @return the sanitized Document + */ +export const domPurifierNodePreprocessor = (document: Document): Document => { + const sanitizedHtml = DOMPurify.sanitize(render(document), { + ADD_TAGS: customTags + }) + return parseDocument(sanitizedHtml) +} diff --git a/src/components/markdown-renderer/hooks/use-convert-markdown-to-react-dom.ts b/src/components/markdown-renderer/hooks/use-convert-markdown-to-react-dom.ts index 0803e3fa7..f47229e98 100644 --- a/src/components/markdown-renderer/hooks/use-convert-markdown-to-react-dom.ts +++ b/src/components/markdown-renderer/hooks/use-convert-markdown-to-react-dom.ts @@ -11,6 +11,7 @@ import convertHtmlToReact from '@hedgedoc/html-to-react' import type { Document } from 'domhandler' import { NodeToReactTransformer } from '../utils/node-to-react-transformer' import { LineIdMapper } from '../utils/line-id-mapper' +import { domPurifierNodePreprocessor } from './dom-purifier-node-preprocessor' /** * Renders markdown code into react elements @@ -40,9 +41,13 @@ export const useConvertMarkdownToReactDom = ( return useMemo(() => { const html = markdownIt.render(markdownCode) + return convertHtmlToReact(html, { transform: (node, index) => htmlToReactTransformer.translateNodeToReactElement(node, index), - preprocessNodes: preprocessNodes + preprocessNodes: (document: Document): Document => { + const processedDocument = preprocessNodes ? preprocessNodes(document) : document + return domPurifierNodePreprocessor(processedDocument) + } }) }, [htmlToReactTransformer, markdownCode, markdownIt, preprocessNodes]) } diff --git a/src/components/markdown-renderer/process-reveal-comment-nodes.ts b/src/components/markdown-renderer/process-reveal-comment-nodes.ts index 444daba75..757f377a3 100644 --- a/src/components/markdown-renderer/process-reveal-comment-nodes.ts +++ b/src/components/markdown-renderer/process-reveal-comment-nodes.ts @@ -9,7 +9,7 @@ import { Logger } from '../../utils/logger' const log = new Logger('reveal.js > Comment Node Preprocessor') const revealCommandSyntax = /^\s*\.(\w*):(.*)$/g -const dataAttributesSyntax = /\s*([\w-]*)=(?:"((?:[^"\\]|\\"|\\)*)"|'([^']*)')/g +const dataAttributesSyntax = /\s*(data-[\w-]*|class)=(?:"((?:[^"\\]|\\"|\\)*)"|'([^']*)')/g /** * Travels through the given {@link Document}, searches for reveal command comments and applies them. diff --git a/src/components/markdown-renderer/regex-plugins/replace-legacy-slideshare-short-code.ts b/src/components/markdown-renderer/regex-plugins/replace-legacy-slideshare-short-code.ts index e6db9b6e6..cfafa860b 100644 --- a/src/components/markdown-renderer/regex-plugins/replace-legacy-slideshare-short-code.ts +++ b/src/components/markdown-renderer/regex-plugins/replace-legacy-slideshare-short-code.ts @@ -14,7 +14,7 @@ export const legacySlideshareShortCode: MarkdownIt.PluginSimple = (markdownIt) = name: 'legacy-slideshare-short-code', regex: finalRegex, replace: (match: string) => { - return `https://www.slideshare.net/${match}` + return `https://www.slideshare.net/${match}` } }) } diff --git a/src/components/markdown-renderer/replace-components/katex/katex-replacer.tsx b/src/components/markdown-renderer/replace-components/katex/katex-replacer.tsx index ed1d52361..948b69e71 100644 --- a/src/components/markdown-renderer/replace-components/katex/katex-replacer.tsx +++ b/src/components/markdown-renderer/replace-components/katex/katex-replacer.tsx @@ -9,7 +9,7 @@ import { isTag } from 'domhandler' import type MarkdownIt from 'markdown-it' import mathJax from 'markdown-it-mathjax' import React from 'react' -import { ComponentReplacer } from '../component-replacer' +import { ComponentReplacer, DO_NOT_REPLACE } from '../component-replacer' import './katex.scss' /** @@ -18,23 +18,24 @@ import './katex.scss' * @param node the node to check * @return The given node if it is a KaTeX block element, undefined otherwise. */ -const getNodeIfKatexBlock = (node: Element): Element | undefined => { +const containsKatexBlock = (node: Element): Element | undefined => { if (node.name !== 'p' || !node.children || node.children.length === 0) { return } return node.children.filter(isTag).find((subnode) => { - return subnode.name === 'app-katex' && subnode.attribs?.inline === undefined + return isKatexTag(subnode, false) ? subnode : undefined }) } /** - * Checks if the given node is a KaTeX inline element. + * Checks if the given node is a KaTeX element. * * @param node the node to check - * @return The given node if it is a KaTeX inline element, undefined otherwise. + * @param expectedInline defines if the found katex element is expected to be an inline or block element. + * @return {@code true} if the given node is a katex element. */ -const getNodeIfInlineKatex = (node: Element): Element | undefined => { - return node.name === 'app-katex' && node.attribs?.inline !== undefined ? node : undefined +const isKatexTag = (node: Element, expectedInline: boolean) => { + return node.name === 'app-katex' && (node.attribs?.['data-inline'] !== undefined) === expectedInline } const KaTeX = React.lazy(() => import(/* webpackChunkName: "katex" */ '@matejmazur/react-katex')) @@ -46,18 +47,18 @@ export class KatexReplacer extends ComponentReplacer { public static readonly markdownItPlugin: MarkdownIt.PluginSimple = mathJax({ beforeMath: '', afterMath: '', - beforeInlineMath: '', + beforeInlineMath: '', afterInlineMath: '', beforeDisplayMath: '', afterDisplayMath: '' }) public replace(node: Element): React.ReactElement | undefined { - const katex = getNodeIfKatexBlock(node) || getNodeIfInlineKatex(node) - if (katex?.children && katex.children[0]) { - const mathJaxContent = ComponentReplacer.extractTextChildContent(katex) - const isInline = katex.attribs?.inline !== undefined - return + if (!(isKatexTag(node, true) || containsKatexBlock(node)) || node.children?.[0] === undefined) { + return DO_NOT_REPLACE } + const latexContent = ComponentReplacer.extractTextChildContent(node) + const isInline = !!node.attribs?.['data-inline'] + return } } diff --git a/src/components/markdown-renderer/replace-components/link-replacer/link-replacer.tsx b/src/components/markdown-renderer/replace-components/link-replacer/link-replacer.tsx index 57b6a9079..b12b0231d 100644 --- a/src/components/markdown-renderer/replace-components/link-replacer/link-replacer.tsx +++ b/src/components/markdown-renderer/replace-components/link-replacer/link-replacer.tsx @@ -35,14 +35,7 @@ export class LinkReplacer extends ComponentReplacer { } const url = node.attribs.href.trim() - - // eslint-disable-next-line no-script-url - if (url.startsWith('data:') || url.startsWith('javascript:') || url.startsWith('vbscript:')) { - return {node.attribs.href} - } - const isJumpMark = url.substr(0, 1) === '#' - const id = url.substr(1) try { diff --git a/yarn.lock b/yarn.lock index deee4858d..2f9d91a0c 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2275,6 +2275,13 @@ resolved "https://registry.yarnpkg.com/@types/diff/-/diff-5.0.1.tgz#9c9b9a331d4e41ccccff553f5d7ef964c6cf4042" integrity sha512-XIpxU6Qdvp1ZE6Kr3yrkv1qgUab0fyf4mHYvW8N3Bx3PCsbN6or1q9/q72cv5jIFWolaGH08U9XyYoLLIykyKQ== +"@types/dompurify@2.3.1": + version "2.3.1" + resolved "https://registry.yarnpkg.com/@types/dompurify/-/dompurify-2.3.1.tgz#2934adcd31c4e6b02676f9c22f9756e5091c04dd" + integrity sha512-YJth9qa0V/E6/XPH1Jq4BC8uCMmO8V1fKWn8PCvuZcAhMn7q0ez9LW6naQT04UZzjFfAPhyRMZmI2a2rbMlEFA== + dependencies: + "@types/trusted-types" "*" + "@types/eslint@^7.2.6": version "7.28.2" resolved "https://registry.yarnpkg.com/@types/eslint/-/eslint-7.28.2.tgz#0ff2947cdd305897c52d5372294e8c76f351db68" @@ -2596,6 +2603,11 @@ dependencies: "@types/jest" "*" +"@types/trusted-types@*": + version "2.0.2" + resolved "https://registry.yarnpkg.com/@types/trusted-types/-/trusted-types-2.0.2.tgz#fc25ad9943bcac11cceb8168db4f275e0e72e756" + integrity sha512-F5DIZ36YVLE+PN+Zwws4kJogq47hNgX3Nx6WyDJ3kcplxyke3XIzB8uK5n/Lpm1HBsbGzd6nmGehL8cPekP+Tg== + "@types/uglify-js@*": version "3.13.1" resolved "https://registry.yarnpkg.com/@types/uglify-js/-/uglify-js-3.13.1.tgz#5e889e9e81e94245c75b6450600e1c5ea2878aea"