From cf319b61b40bb54dddbed3fe655376dec5649307 Mon Sep 17 00:00:00 2001 From: Alf Eaton Date: Tue, 29 Aug 2023 11:03:54 +0100 Subject: [PATCH] [visual] Improved pasted HTML handling (#14384) * Replace non-breaking spaces added by Chrome on copy * Ignore text/html from VS Code * Improve table border handling * Remove unused cy.spy * Map em to textit and strong to textbf * Left-align table cells by default * Remove "justify" mapping * Detect border on table row * Remove protection for special characters in pasted HTML * If the only pasted HTML content is in a code block, use the plain text version * Enable paste-html feature in Storybook * Improve table handling GitOrigin-RevId: a912aa1fc659089451103e63c5d0fd3ae2a30627 --- .../extensions/visual/paste-html.ts | 222 ++++++++++++------ .../source-editor/source-editor.stories.tsx | 1 + ...demirror-editor-visual-paste-html.spec.tsx | 182 ++++++++++---- 3 files changed, 293 insertions(+), 112 deletions(-) diff --git a/services/web/frontend/js/features/source-editor/extensions/visual/paste-html.ts b/services/web/frontend/js/features/source-editor/extensions/visual/paste-html.ts index 96da067e5b..8b9447c5da 100644 --- a/services/web/frontend/js/features/source-editor/extensions/visual/paste-html.ts +++ b/services/web/frontend/js/features/source-editor/extensions/visual/paste-html.ts @@ -20,6 +20,14 @@ export const pasteHtml = Prec.highest( return false } + // ignore text/html from VS Code + if ( + clipboardData.types.includes('application/vnd.code.copymetadata') || + clipboardData.types.includes('vscode-editor-data') + ) { + return false + } + const html = clipboardData.getData('text/html').trim() if (html.length === 0) { @@ -28,7 +36,15 @@ export const pasteHtml = Prec.highest( // convert the HTML to LaTeX try { - const latex = htmlToLaTeX(html) + const parser = new DOMParser() + const { documentElement } = parser.parseFromString(html, 'text/html') + + // if the only content is in a code block, use the plain text version + if (onlyCode(documentElement)) { + return false + } + + const latex = htmlToLaTeX(documentElement) view.dispatch( view.state.changeByRange(range => { @@ -59,19 +75,22 @@ const removeUnwantedElements = ( } } -const htmlToLaTeX = (html: string) => { - const parser = new DOMParser() - const { documentElement } = parser.parseFromString(html, 'text/html') +// return true if the text content of the first element +// is the same as the text content of the whole document element +const onlyCode = (documentElement: HTMLElement) => + documentElement.querySelector('code')?.textContent?.trim() === + documentElement.textContent?.trim() +const htmlToLaTeX = (documentElement: HTMLElement) => { // remove style elements removeUnwantedElements(documentElement, 'style') + // replace non-breaking spaces added by Chrome on copy + processWhitespace(documentElement) + // pre-process table elements processTables(documentElement) - // protect special characters in non-LaTeX text nodes - protectSpecialCharacters(documentElement) - processMatchedElements(documentElement) const text = documentElement.textContent @@ -84,36 +103,15 @@ const htmlToLaTeX = (html: string) => { return text.replaceAll(/\n{2,}/g, '\n\n') } -const protectSpecialCharacters = (documentElement: HTMLElement) => { - for (const element of documentElement.childNodes) { - const text = element.textContent - if (text) { - // if there are no code blocks, use backslash as an indicator of LaTeX code that shouldn't be protected - if ( - element instanceof HTMLElement && - !element.querySelector('code') && - text.includes('\\') - ) { - continue - } +const processWhitespace = (documentElement: HTMLElement) => { + const walker = document.createTreeWalker( + documentElement, + NodeFilter.SHOW_TEXT + ) - const walker = document.createTreeWalker(element, NodeFilter.SHOW_TEXT) - - for (let node = walker.nextNode(); node; node = walker.nextNode()) { - const text = node.textContent - if (text) { - // leave text that's in a code block - if (node.parentElement?.closest('code')) { - continue - } - - // replace non-backslash-prefixed characters - node.textContent = text.replaceAll( - /(^|[^\\])([#$%&~_^\\{}])/g, - (_match, prefix: string, char: string) => `${prefix}\\${char}` - ) - } - } + for (let node = walker.nextNode(); node; node = walker.nextNode()) { + if (node.textContent === ' ') { + node.textContent = ' ' } } } @@ -180,40 +178,65 @@ const processTables = (element: HTMLElement) => { } } +const cellAlignment = new Map([ + ['left', 'l'], + ['center', 'c'], + ['right', 'r'], +]) + const tabular = (element: HTMLTableElement) => { - const options = [] + const definitions: Array<{ + alignment: string + borderLeft: boolean + borderRight: boolean + }> = [] - // NOTE: only analysing cells in the first row - const row = element.querySelector('tr') - - if (row) { - // TODO: look for horizontal borders and insert \hline (or \toprule, \midrule, \bottomrule etc)? + const rows = element.querySelectorAll('tr') + for (const row of rows) { const cells = [...row.childNodes].filter( element => element.nodeName === 'TD' || element.nodeName === 'TH' - ) as Array + ) as Array + + let index = 0 for (const cell of cells) { - const { borderLeft, textAlign, borderRight } = cell.style + // NOTE: reading the alignment and borders from the first cell definition in each column + if (definitions[index] === undefined) { + const { textAlign, borderLeftStyle, borderRightStyle } = cell.style - if (borderLeft && borderLeft !== 'none') { - // avoid duplicating when both left and right borders are defined - if (options.length === 0 || options[options.length - 1] !== '|') { - options.push('|') + definitions[index] = { + alignment: textAlign, + borderLeft: visibleBorderStyle(borderLeftStyle), + borderRight: visibleBorderStyle(borderRightStyle), } } - - options.push( - textAlign === 'left' ? 'l' : textAlign === 'right' ? 'r' : 'c' - ) - - if (borderRight && borderRight !== 'none') { - options.push('|') - } + index += Number(cell.getAttribute('colspan') ?? 1) } } - return options.join(' ') + for (let index = 0; index <= definitions.length; index++) { + // fill in missing definitions + const item = definitions[index] || { + alignment: 'left', + borderLeft: false, + borderRight: false, + } + + // remove left border if previous column had a right border + if (item.borderLeft && index > 0 && definitions[index - 1]?.borderRight) { + item.borderLeft = false + } + } + + return definitions + .flatMap(definition => [ + definition.borderLeft ? '|' : '', + cellAlignment.get(definition.alignment) ?? 'l', + definition.borderRight ? '|' : '', + ]) + .filter(Boolean) + .join(' ') } const listDepth = ( @@ -255,7 +278,50 @@ const hasContent = (element: HTMLElement): boolean => { return Boolean(element.textContent && element.textContent.trim().length > 0) } -export const selectors = [ +type BorderStyle = + | 'borderTopStyle' + | 'borderRightStyle' + | 'borderBottomStyle' + | 'borderLeftStyle' + +const visibleBorderStyle = (style: CSSStyleDeclaration[BorderStyle]): boolean => + !!style && style !== 'none' && style !== 'hidden' + +const rowHasBorderStyle = ( + element: HTMLTableRowElement, + style: BorderStyle +): boolean => { + if (visibleBorderStyle(element.style[style])) { + return true + } + + const cells = element.querySelectorAll('th,td') + + return [...cells].every(cell => visibleBorderStyle(cell.style[style])) +} + +const isTableRowElement = ( + element: Element | null +): element is HTMLTableRowElement => element?.tagName === 'TR' + +const nextRowHasBorderStyle = ( + element: HTMLTableRowElement, + style: BorderStyle +) => { + const { nextElementSibling } = element + return ( + isTableRowElement(nextElementSibling) && + rowHasBorderStyle(nextElementSibling, style) + ) +} + +const startMulticolumn = (element: HTMLTableCellElement): string => { + const colspan = element.getAttribute('colspan') ?? 1 + const alignment = cellAlignment.get(element.style.textAlign) ?? 'l' + return `\\multicolumn{${Number(colspan)}}{${alignment}}{` +} + +const selectors = [ createSelector({ selector: 'b', match: element => @@ -273,6 +339,12 @@ export const selectors = [ end: () => '}', inside: true, }), + createSelector({ + selector: 'strong', + match: element => hasContent(element), + start: () => '\\textbf{', + end: () => '}', + }), createSelector({ selector: 'i', match: element => @@ -287,6 +359,12 @@ export const selectors = [ start: () => '\\textit{', end: () => '}', }), + createSelector({ + selector: 'em', + match: element => hasContent(element), + start: () => '\\textit{', + end: () => '}', + }), createSelector({ selector: 'sup', match: element => hasContent(element), @@ -370,13 +448,13 @@ export const selectors = [ }), createSelector({ selector: '.ol-table-wrap', - start: element => `\n\n\\begin{table}\n\\centering\n`, + start: () => `\n\n\\begin{table}\n\\centering\n`, end: () => `\n\\end{table}\n\n`, }), createSelector({ selector: 'table', - start: element => `\n\\begin{tabular}{${tabular(element)}}\n`, - end: () => `\n\\end{tabular}\n`, + start: element => `\n\\begin{tabular}{${tabular(element)}}`, + end: () => `\\end{tabular}\n`, }), createSelector({ selector: 'thead', @@ -395,14 +473,22 @@ export const selectors = [ }), createSelector({ selector: 'tr', - match: element => element.nextElementSibling?.nodeName === 'TR', - end: () => `\n`, + start: element => { + const borderTop = rowHasBorderStyle(element, 'borderTopStyle') + return borderTop ? '\\hline\n' : '' + }, + end: element => { + const borderBottom = rowHasBorderStyle(element, 'borderBottomStyle') + return borderBottom && !nextRowHasBorderStyle(element, 'borderTopStyle') + ? '\n\\hline\n' + : '\n' + }, }), createSelector({ selector: 'tr > td:not(:last-child), tr > th:not(:last-child)', - start: element => { + start: (element: HTMLTableCellElement) => { const colspan = element.getAttribute('colspan') - return colspan ? `\\multicolumn{${Number(colspan)}}{` : '' + return colspan ? startMulticolumn(element) : '' }, end: element => { const colspan = element.getAttribute('colspan') @@ -411,13 +497,13 @@ export const selectors = [ }), createSelector({ selector: 'tr > td:last-child, tr > th:last-child', - start: element => { + start: (element: HTMLTableCellElement) => { const colspan = element.getAttribute('colspan') - return colspan ? `\\multicolumn{${Number(colspan)}}{` : '' + return colspan ? startMulticolumn(element) : '' }, end: element => { const colspan = element.getAttribute('colspan') - return colspan ? `} \\\\` : ` \\\\` + return colspan ? `} \\\\` : ` \\\\` }, }), createSelector({ diff --git a/services/web/frontend/stories/source-editor/source-editor.stories.tsx b/services/web/frontend/stories/source-editor/source-editor.stories.tsx index 267b421888..ad71f54158 100644 --- a/services/web/frontend/stories/source-editor/source-editor.stories.tsx +++ b/services/web/frontend/stories/source-editor/source-editor.stories.tsx @@ -99,6 +99,7 @@ export const Visual = (args: any, { globals: { theme } }: any) => { 'ol-mathJax3Path': 'https://unpkg.com/mathjax@3.2.2/es5/tex-svg-full.js', 'ol-splitTestVariants': { 'figure-modal': 'enabled', + 'paste-html': 'enabled', 'table-generator': 'enabled', }, }) diff --git a/services/web/test/frontend/features/source-editor/components/codemirror-editor-visual-paste-html.spec.tsx b/services/web/test/frontend/features/source-editor/components/codemirror-editor-visual-paste-html.spec.tsx index b64e8772ab..eb5aad1127 100644 --- a/services/web/test/frontend/features/source-editor/components/codemirror-editor-visual-paste-html.spec.tsx +++ b/services/web/test/frontend/features/source-editor/components/codemirror-editor-visual-paste-html.spec.tsx @@ -55,7 +55,6 @@ describe(' paste HTML in Visual mode', function () { const clipboardData = new DataTransfer() clipboardData.setData('text/html', data) - cy.spy(clipboardData, 'getData').as('get-data') cy.get('@content').trigger('paste', { clipboardData }) cy.get('@content').should('have.text', ' foo bar') @@ -69,14 +68,13 @@ describe(' paste HTML in Visual mode', function () { const clipboardData = new DataTransfer() clipboardData.setData('text/html', data) - cy.spy(clipboardData, 'getData').as('get-data') cy.get('@content').trigger('paste', { clipboardData }) cy.get('@content').should('have.text', ' foo bar') cy.get('.ol-cm-item').should('have.length', 2) }) - it('handles a pasted simple table', function () { + it('handles a pasted table', function () { mountEditor() const data = @@ -84,29 +82,80 @@ describe(' paste HTML in Visual mode', function () { const clipboardData = new DataTransfer() clipboardData.setData('text/html', data) - cy.spy(clipboardData, 'getData').as('get-data') cy.get('@content').trigger('paste', { clipboardData }) cy.get('@content').should( 'have.text', - '\\begin{tabular}{c c}foo & bar ↩\\end{tabular}' + '\\begin{tabular}{l l}foo & bar ↩\\end{tabular}' ) }) - it('handles a pasted simple table with borders', function () { + it('handles a pasted table with cell borders', function () { mountEditor() const data = - '
foobar
' + '
foobar
' const clipboardData = new DataTransfer() clipboardData.setData('text/html', data) - cy.spy(clipboardData, 'getData').as('get-data') cy.get('@content').trigger('paste', { clipboardData }) cy.get('@content').should( 'have.text', - '\\begin{tabular}{| c | c |}foo & bar ↩\\end{tabular}' + '\\begin{tabular}{| l | l |}\\hlinefoo & bar ↩\\hline\\end{tabular}' + ) + }) + + it('handles a pasted table with row borders', function () { + mountEditor() + + const data = + '
foobar
' + + const clipboardData = new DataTransfer() + clipboardData.setData('text/html', data) + cy.get('@content').trigger('paste', { clipboardData }) + + cy.get('@content').should( + 'have.text', + '\\begin{tabular}{l l}\\hlinefoo & bar ↩\\hline\\end{tabular}' + ) + }) + + it('handles a pasted table with adjacent borders', function () { + mountEditor() + + const data = [ + '', + '', + '', + '', + '
foobar
foobar
foobar
', + ].join('\n') + + const clipboardData = new DataTransfer() + clipboardData.setData('text/html', data) + cy.get('@content').trigger('paste', { clipboardData }) + + cy.get('@content').should( + 'have.text', + '\\begin{tabular}{| l | l |}\\hlinefoo & bar ↩\\hlinefoo & bar ↩\\hlinefoo & bar ↩\\hline\\end{tabular}' + ) + }) + + it('handles a pasted table with alignment', function () { + mountEditor() + + const data = + '
foofoofoofoofoo
' + + const clipboardData = new DataTransfer() + clipboardData.setData('text/html', data) + cy.get('@content').trigger('paste', { clipboardData }) + + cy.get('@content').should( + 'have.text', + '\\begin{tabular}{l l c r l}foo & foo & foo & foo & foo ↩\\end{tabular}' ) }) @@ -117,18 +166,38 @@ describe(' paste HTML in Visual mode', function () { ``, ``, ``, - ``, + ``, `
testtesttest
testtest
testtest
testtest
`, ].join('') const clipboardData = new DataTransfer() clipboardData.setData('text/html', data) - cy.spy(clipboardData, 'getData').as('get-data') cy.get('@content').trigger('paste', { clipboardData }) cy.get('@content').should( 'have.text', - '\\begin{tabular}{c c c}test & test & test ↩\\multicolumn{2}{test} & test ↩test & \\multicolumn{2}{test} ↩\\end{tabular}' + '\\begin{tabular}{l l l}test & test & test ↩\\multicolumn{2}{l}{test} & test ↩test & \\multicolumn{2}{r}{test} ↩\\end{tabular}' + ) + }) + + it('handles a pasted table with adjacent borders and merged cells', function () { + mountEditor() + + const data = [ + '', + '', + '', + '', + '
foo
foobar
foobar
', + ].join('\n') + + const clipboardData = new DataTransfer() + clipboardData.setData('text/html', data) + cy.get('@content').trigger('paste', { clipboardData }) + + cy.get('@content').should( + 'have.text', + '\\begin{tabular}{| l | l |}\\hline\\multicolumn{2}{l}{foo} ↩\\hlinefoo & bar ↩\\hlinefoo & bar ↩\\hline\\end{tabular}' ) }) @@ -140,12 +209,11 @@ describe(' paste HTML in Visual mode', function () { const clipboardData = new DataTransfer() clipboardData.setData('text/html', data) - cy.spy(clipboardData, 'getData').as('get-data') cy.get('@content').trigger('paste', { clipboardData }) cy.get('@content').should( 'have.text', - 'A table\\begin{tabular}{c c}foo & bar ↩\\end{tabular}' + 'A table\\begin{tabular}{l l}foo & bar ↩\\end{tabular}' ) }) @@ -156,7 +224,6 @@ describe(' paste HTML in Visual mode', function () { const clipboardData = new DataTransfer() clipboardData.setData('text/html', data) - cy.spy(clipboardData, 'getData').as('get-data') cy.get('@content').trigger('paste', { clipboardData }) cy.get('@content').should('have.text', '{foo}') @@ -166,86 +233,113 @@ describe(' paste HTML in Visual mode', function () { it('handles a pasted code block', function () { mountEditor() - const data = '
foo
' + const data = 'test
foo
test' const clipboardData = new DataTransfer() clipboardData.setData('text/html', data) - cy.spy(clipboardData, 'getData').as('get-data') cy.get('@content').trigger('paste', { clipboardData }) - cy.get('@content').should('have.text', 'foo') + cy.get('@content').should('have.text', 'test foo test') cy.get('.ol-cm-environment-verbatim').should('have.length', 5) cy.get('.cm-line').eq(2).click() cy.get('@content').should( 'have.text', - '\\begin{verbatim}foo\\end{verbatim}' + 'test \\begin{verbatim}foo\\end{verbatim} test' ) }) it('handles pasted inline code', function () { mountEditor() - const data = 'foo' + const data = 'test foo test' const clipboardData = new DataTransfer() clipboardData.setData('text/html', data) - cy.spy(clipboardData, 'getData').as('get-data') cy.get('@content').trigger('paste', { clipboardData }) - cy.get('@content').should('have.text', '\\verb|foo|') - cy.get('.ol-cm-command-verb').should('have.length', 1) + cy.get('@content').should('have.text', 'test foo test') + cy.get('.ol-cm-command-verb') + .should('have.length', 1) + .should('have.text', 'foo') + }) + + it('use text/plain for a wrapper code element', function () { + mountEditor() + + const clipboardData = new DataTransfer() + clipboardData.setData('text/html', 'foo') + clipboardData.setData('text/plain', 'foo') + cy.get('@content').trigger('paste', { clipboardData }) + + cy.get('@content').should('have.text', 'foo') + cy.get('.ol-cm-command-verb').should('have.length', 0) + }) + + it('use text/plain for a code element in a pre element', function () { + mountEditor() + + const clipboardData = new DataTransfer() + clipboardData.setData('text/html', '
foo
') + clipboardData.setData('text/plain', 'foo') + cy.get('@content').trigger('paste', { clipboardData }) + + cy.get('@content').should('have.text', 'foo') + cy.get('.ol-cm-command-verb').should('have.length', 0) + cy.get('.ol-cm-environment-verbatim').should('have.length', 0) }) it('handles pasted text with formatting', function () { mountEditor() - const data = 'footh bar2 baz' + const data = + 'footh bar2 baz woo woo woo' const clipboardData = new DataTransfer() clipboardData.setData('text/html', data) - cy.spy(clipboardData, 'getData').as('get-data') cy.get('@content').trigger('paste', { clipboardData }) - cy.get('@content').should('have.text', 'footh bar2 baz') - cy.get('.ol-cm-command-textbf').should('have.length', 1) - cy.get('.ol-cm-command-textit').should('have.length', 1) + cy.get('@content').should('have.text', 'footh bar2 baz woo woo woo') + cy.get('.ol-cm-command-textbf').should('have.length', 2) + cy.get('.ol-cm-command-textit').should('have.length', 2) cy.get('.ol-cm-command-textsuperscript').should('have.length', 1) cy.get('.ol-cm-command-textsubscript').should('have.length', 1) }) - it('protects special characters', function () { + it('removes a non-breaking space when a text node contains no other content', function () { mountEditor() - const data = 'foo & bar~baz' + const data = 'foo\xa0bar' const clipboardData = new DataTransfer() clipboardData.setData('text/html', data) - cy.spy(clipboardData, 'getData').as('get-data') cy.get('@content').trigger('paste', { clipboardData }) - cy.get('@content').should('have.text', 'foo & bar~baz') - cy.get('.ol-cm-character').should('have.length', 2) + cy.get('@content').should('have.text', 'foo bar') }) - it('does not protect special characters in code blocks', function () { + it('does not remove a non-breaking space when a text node contains other content', function () { mountEditor() - const data = 'foo & bar~baz \\textbf{foo}' + const data = 'foo\xa0bar' const clipboardData = new DataTransfer() clipboardData.setData('text/html', data) - cy.spy(clipboardData, 'getData').as('get-data') cy.get('@content').trigger('paste', { clipboardData }) - cy.get('@content').should( - 'have.text', - 'foo & bar~baz \\verb|\\textbf{foo}|' - ) + cy.get('@content').should('have.text', 'foo bar') + }) - cy.get('.cm-line').eq(0).type('{Enter}') - cy.get('@content').should('have.text', 'foo & bar~baz \\textbf{foo}') - cy.get('.ol-cm-character').should('have.length', 2) - cy.get('.ol-cm-command-verb').should('have.length', 1) + it('ignores HTML pasted from VS Code', function () { + mountEditor() + + const clipboardData = new DataTransfer() + clipboardData.setData('text/html', 'foo') + clipboardData.setData('text/plain', 'foo') + clipboardData.setData('application/vnd.code.copymetadata', 'test') + cy.get('@content').trigger('paste', { clipboardData }) + + cy.get('@content').should('have.text', 'foo') + cy.get('.ol-cm-command-textbf').should('have.length', 0) }) })