[visual] Improved pasted HTML handling (#14384)

* Replace non-breaking spaces added by Chrome on copy
* Ignore text/html from VS Code
* Improve table border handling
* Remove unused cy.spy
* Map em to textit and strong to textbf
* Left-align table cells by default
* Remove "justify" mapping
* Detect border on table row
* Remove protection for special characters in pasted HTML
* If the only pasted HTML content is in a code block, use the plain text version
* Enable paste-html feature in Storybook
* Improve table handling

GitOrigin-RevId: a912aa1fc659089451103e63c5d0fd3ae2a30627
This commit is contained in:
Alf Eaton 2023-08-29 11:03:54 +01:00 committed by Copybot
parent d5a3c4c303
commit cf319b61b4
3 changed files with 293 additions and 112 deletions

View file

@ -20,6 +20,14 @@ export const pasteHtml = Prec.highest(
return false
}
// ignore text/html from VS Code
if (
clipboardData.types.includes('application/vnd.code.copymetadata') ||
clipboardData.types.includes('vscode-editor-data')
) {
return false
}
const html = clipboardData.getData('text/html').trim()
if (html.length === 0) {
@ -28,7 +36,15 @@ export const pasteHtml = Prec.highest(
// convert the HTML to LaTeX
try {
const latex = htmlToLaTeX(html)
const parser = new DOMParser()
const { documentElement } = parser.parseFromString(html, 'text/html')
// if the only content is in a code block, use the plain text version
if (onlyCode(documentElement)) {
return false
}
const latex = htmlToLaTeX(documentElement)
view.dispatch(
view.state.changeByRange(range => {
@ -59,19 +75,22 @@ const removeUnwantedElements = (
}
}
const htmlToLaTeX = (html: string) => {
const parser = new DOMParser()
const { documentElement } = parser.parseFromString(html, 'text/html')
// return true if the text content of the first <code> element
// is the same as the text content of the whole document element
const onlyCode = (documentElement: HTMLElement) =>
documentElement.querySelector('code')?.textContent?.trim() ===
documentElement.textContent?.trim()
const htmlToLaTeX = (documentElement: HTMLElement) => {
// remove style elements
removeUnwantedElements(documentElement, 'style')
// replace non-breaking spaces added by Chrome on copy
processWhitespace(documentElement)
// pre-process table elements
processTables(documentElement)
// protect special characters in non-LaTeX text nodes
protectSpecialCharacters(documentElement)
processMatchedElements(documentElement)
const text = documentElement.textContent
@ -84,36 +103,15 @@ const htmlToLaTeX = (html: string) => {
return text.replaceAll(/\n{2,}/g, '\n\n')
}
const protectSpecialCharacters = (documentElement: HTMLElement) => {
for (const element of documentElement.childNodes) {
const text = element.textContent
if (text) {
// if there are no code blocks, use backslash as an indicator of LaTeX code that shouldn't be protected
if (
element instanceof HTMLElement &&
!element.querySelector('code') &&
text.includes('\\')
) {
continue
}
const processWhitespace = (documentElement: HTMLElement) => {
const walker = document.createTreeWalker(
documentElement,
NodeFilter.SHOW_TEXT
)
const walker = document.createTreeWalker(element, NodeFilter.SHOW_TEXT)
for (let node = walker.nextNode(); node; node = walker.nextNode()) {
const text = node.textContent
if (text) {
// leave text that's in a code block
if (node.parentElement?.closest('code')) {
continue
}
// replace non-backslash-prefixed characters
node.textContent = text.replaceAll(
/(^|[^\\])([#$%&~_^\\{}])/g,
(_match, prefix: string, char: string) => `${prefix}\\${char}`
)
}
}
for (let node = walker.nextNode(); node; node = walker.nextNode()) {
if (node.textContent === ' ') {
node.textContent = ' '
}
}
}
@ -180,40 +178,65 @@ const processTables = (element: HTMLElement) => {
}
}
const cellAlignment = new Map([
['left', 'l'],
['center', 'c'],
['right', 'r'],
])
const tabular = (element: HTMLTableElement) => {
const options = []
const definitions: Array<{
alignment: string
borderLeft: boolean
borderRight: boolean
}> = []
// NOTE: only analysing cells in the first row
const row = element.querySelector('tr')
if (row) {
// TODO: look for horizontal borders and insert \hline (or \toprule, \midrule, \bottomrule etc)?
const rows = element.querySelectorAll('tr')
for (const row of rows) {
const cells = [...row.childNodes].filter(
element => element.nodeName === 'TD' || element.nodeName === 'TH'
) as Array<HTMLTableCellElement | HTMLTableHeaderCellElement>
) as Array<HTMLTableCellElement>
let index = 0
for (const cell of cells) {
const { borderLeft, textAlign, borderRight } = cell.style
// NOTE: reading the alignment and borders from the first cell definition in each column
if (definitions[index] === undefined) {
const { textAlign, borderLeftStyle, borderRightStyle } = cell.style
if (borderLeft && borderLeft !== 'none') {
// avoid duplicating when both left and right borders are defined
if (options.length === 0 || options[options.length - 1] !== '|') {
options.push('|')
definitions[index] = {
alignment: textAlign,
borderLeft: visibleBorderStyle(borderLeftStyle),
borderRight: visibleBorderStyle(borderRightStyle),
}
}
options.push(
textAlign === 'left' ? 'l' : textAlign === 'right' ? 'r' : 'c'
)
if (borderRight && borderRight !== 'none') {
options.push('|')
}
index += Number(cell.getAttribute('colspan') ?? 1)
}
}
return options.join(' ')
for (let index = 0; index <= definitions.length; index++) {
// fill in missing definitions
const item = definitions[index] || {
alignment: 'left',
borderLeft: false,
borderRight: false,
}
// remove left border if previous column had a right border
if (item.borderLeft && index > 0 && definitions[index - 1]?.borderRight) {
item.borderLeft = false
}
}
return definitions
.flatMap(definition => [
definition.borderLeft ? '|' : '',
cellAlignment.get(definition.alignment) ?? 'l',
definition.borderRight ? '|' : '',
])
.filter(Boolean)
.join(' ')
}
const listDepth = (
@ -255,7 +278,50 @@ const hasContent = (element: HTMLElement): boolean => {
return Boolean(element.textContent && element.textContent.trim().length > 0)
}
export const selectors = [
type BorderStyle =
| 'borderTopStyle'
| 'borderRightStyle'
| 'borderBottomStyle'
| 'borderLeftStyle'
const visibleBorderStyle = (style: CSSStyleDeclaration[BorderStyle]): boolean =>
!!style && style !== 'none' && style !== 'hidden'
const rowHasBorderStyle = (
element: HTMLTableRowElement,
style: BorderStyle
): boolean => {
if (visibleBorderStyle(element.style[style])) {
return true
}
const cells = element.querySelectorAll<HTMLTableCellElement>('th,td')
return [...cells].every(cell => visibleBorderStyle(cell.style[style]))
}
const isTableRowElement = (
element: Element | null
): element is HTMLTableRowElement => element?.tagName === 'TR'
const nextRowHasBorderStyle = (
element: HTMLTableRowElement,
style: BorderStyle
) => {
const { nextElementSibling } = element
return (
isTableRowElement(nextElementSibling) &&
rowHasBorderStyle(nextElementSibling, style)
)
}
const startMulticolumn = (element: HTMLTableCellElement): string => {
const colspan = element.getAttribute('colspan') ?? 1
const alignment = cellAlignment.get(element.style.textAlign) ?? 'l'
return `\\multicolumn{${Number(colspan)}}{${alignment}}{`
}
const selectors = [
createSelector({
selector: 'b',
match: element =>
@ -273,6 +339,12 @@ export const selectors = [
end: () => '}',
inside: true,
}),
createSelector({
selector: 'strong',
match: element => hasContent(element),
start: () => '\\textbf{',
end: () => '}',
}),
createSelector({
selector: 'i',
match: element =>
@ -287,6 +359,12 @@ export const selectors = [
start: () => '\\textit{',
end: () => '}',
}),
createSelector({
selector: 'em',
match: element => hasContent(element),
start: () => '\\textit{',
end: () => '}',
}),
createSelector({
selector: 'sup',
match: element => hasContent(element),
@ -370,13 +448,13 @@ export const selectors = [
}),
createSelector({
selector: '.ol-table-wrap',
start: element => `\n\n\\begin{table}\n\\centering\n`,
start: () => `\n\n\\begin{table}\n\\centering\n`,
end: () => `\n\\end{table}\n\n`,
}),
createSelector({
selector: 'table',
start: element => `\n\\begin{tabular}{${tabular(element)}}\n`,
end: () => `\n\\end{tabular}\n`,
start: element => `\n\\begin{tabular}{${tabular(element)}}`,
end: () => `\\end{tabular}\n`,
}),
createSelector({
selector: 'thead',
@ -395,14 +473,22 @@ export const selectors = [
}),
createSelector({
selector: 'tr',
match: element => element.nextElementSibling?.nodeName === 'TR',
end: () => `\n`,
start: element => {
const borderTop = rowHasBorderStyle(element, 'borderTopStyle')
return borderTop ? '\\hline\n' : ''
},
end: element => {
const borderBottom = rowHasBorderStyle(element, 'borderBottomStyle')
return borderBottom && !nextRowHasBorderStyle(element, 'borderTopStyle')
? '\n\\hline\n'
: '\n'
},
}),
createSelector({
selector: 'tr > td:not(:last-child), tr > th:not(:last-child)',
start: element => {
start: (element: HTMLTableCellElement) => {
const colspan = element.getAttribute('colspan')
return colspan ? `\\multicolumn{${Number(colspan)}}{` : ''
return colspan ? startMulticolumn(element) : ''
},
end: element => {
const colspan = element.getAttribute('colspan')
@ -411,13 +497,13 @@ export const selectors = [
}),
createSelector({
selector: 'tr > td:last-child, tr > th:last-child',
start: element => {
start: (element: HTMLTableCellElement) => {
const colspan = element.getAttribute('colspan')
return colspan ? `\\multicolumn{${Number(colspan)}}{` : ''
return colspan ? startMulticolumn(element) : ''
},
end: element => {
const colspan = element.getAttribute('colspan')
return colspan ? `} \\\\` : ` \\\\`
return colspan ? `} \\\\` : ` \\\\`
},
}),
createSelector({

View file

@ -99,6 +99,7 @@ export const Visual = (args: any, { globals: { theme } }: any) => {
'ol-mathJax3Path': 'https://unpkg.com/mathjax@3.2.2/es5/tex-svg-full.js',
'ol-splitTestVariants': {
'figure-modal': 'enabled',
'paste-html': 'enabled',
'table-generator': 'enabled',
},
})

View file

@ -55,7 +55,6 @@ describe('<CodeMirrorEditor/> paste HTML in Visual mode', function () {
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.spy(clipboardData, 'getData').as('get-data')
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should('have.text', ' foo bar')
@ -69,14 +68,13 @@ describe('<CodeMirrorEditor/> paste HTML in Visual mode', function () {
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.spy(clipboardData, 'getData').as('get-data')
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should('have.text', ' foo bar')
cy.get('.ol-cm-item').should('have.length', 2)
})
it('handles a pasted simple table', function () {
it('handles a pasted table', function () {
mountEditor()
const data =
@ -84,29 +82,80 @@ describe('<CodeMirrorEditor/> paste HTML in Visual mode', function () {
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.spy(clipboardData, 'getData').as('get-data')
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should(
'have.text',
'\\begin{tabular}{c c}foo & bar ↩\\end{tabular}'
'\\begin{tabular}{l l}foo & bar ↩\\end{tabular}'
)
})
it('handles a pasted simple table with borders', function () {
it('handles a pasted table with cell borders', function () {
mountEditor()
const data =
'<table><tbody><tr><td style="border-left:1px solid black;border-right:1px solid black">foo</td><td style="border-left:1px solid black;border-right:1px solid black">bar</td></tr></tbody></table>'
'<table><tbody><tr><td style="border-left:1px solid black;border-right:1px solid black;border-top:1px solid black;border-bottom:1px solid black">foo</td><td style="border-left:1px solid black;border-right:1px solid black;border-top:1px solid black;border-bottom:1px solid black">bar</td></tr></tbody></table>'
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.spy(clipboardData, 'getData').as('get-data')
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should(
'have.text',
'\\begin{tabular}{| c | c |}foo & bar ↩\\end{tabular}'
'\\begin{tabular}{| l | l |}\\hlinefoo & bar ↩\\hline\\end{tabular}'
)
})
it('handles a pasted table with row borders', function () {
mountEditor()
const data =
'<table><tbody><tr style="border-top:1px solid black;border-bottom:1px solid black"><td>foo</td><td>bar</td></tr></tbody></table>'
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should(
'have.text',
'\\begin{tabular}{l l}\\hlinefoo & bar ↩\\hline\\end{tabular}'
)
})
it('handles a pasted table with adjacent borders', function () {
mountEditor()
const data = [
'<table><tbody>',
'<tr><td style="border-left:1px solid black;border-right:1px solid black;border-top:1px solid black;border-bottom:1px solid black">foo</td><td style="border-left:1px solid black;border-right:1px solid black;border-top:1px solid black;border-bottom:1px solid black">bar</td></tr>',
'<tr><td style="border-left:1px solid black;border-right:1px solid black;border-top:1px solid black;border-bottom:1px solid black">foo</td><td style="border-left:1px solid black;border-right:1px solid black;border-top:1px solid black;border-bottom:1px solid black">bar</td></tr>',
'<tr><td style="border-left:1px solid black;border-right:1px solid black;border-top:1px solid black;border-bottom:1px solid black">foo</td><td style="border-left:1px solid black;border-right:1px solid black;border-top:1px solid black;border-bottom:1px solid black">bar</td></tr>',
'</tbody></table>',
].join('\n')
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should(
'have.text',
'\\begin{tabular}{| l | l |}\\hlinefoo & bar ↩\\hlinefoo & bar ↩\\hlinefoo & bar ↩\\hline\\end{tabular}'
)
})
it('handles a pasted table with alignment', function () {
mountEditor()
const data =
'<table><tbody><tr><td>foo</td><td style="text-align:left">foo</td><td style="text-align:center">foo</td><td style="text-align:right">foo</td><td style="text-align:justify">foo</td></tr></tbody></table>'
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should(
'have.text',
'\\begin{tabular}{l l c r l}foo & foo & foo & foo & foo ↩\\end{tabular}'
)
})
@ -117,18 +166,38 @@ describe('<CodeMirrorEditor/> paste HTML in Visual mode', function () {
`<table><tbody>`,
`<tr><td>test</td><td>test</td><td>test</td></tr>`,
`<tr><td colspan="2">test</td><td>test</td></tr>`,
`<tr><td>test</td><td colspan="2">test</td></tr>`,
`<tr><td>test</td><td colspan="2" style="text-align:right">test</td></tr>`,
`</tbody></table>`,
].join('')
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.spy(clipboardData, 'getData').as('get-data')
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should(
'have.text',
'\\begin{tabular}{c c c}test & test & test ↩\\multicolumn{2}{test} & test ↩test & \\multicolumn{2}{test} ↩\\end{tabular}'
'\\begin{tabular}{l l l}test & test & test ↩\\multicolumn{2}{l}{test} & test ↩test & \\multicolumn{2}{r}{test} ↩\\end{tabular}'
)
})
it('handles a pasted table with adjacent borders and merged cells', function () {
mountEditor()
const data = [
'<table><tbody>',
'<tr><td style="border-left:1px solid black;border-right:1px solid black;border-top:1px solid black;border-bottom:1px solid black" colspan="2">foo</td></tr>',
'<tr><td style="border-left:1px solid black;border-right:1px solid black;border-top:1px solid black;border-bottom:1px solid black">foo</td><td style="border-left:1px solid black;border-right:1px solid black;border-top:1px solid black;border-bottom:1px solid black">bar</td></tr>',
'<tr><td style="border-left:1px solid black;border-right:1px solid black;border-top:1px solid black;border-bottom:1px solid black">foo</td><td style="border-left:1px solid black;border-right:1px solid black;border-top:1px solid black;border-bottom:1px solid black">bar</td></tr>',
'</tbody></table>',
].join('\n')
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should(
'have.text',
'\\begin{tabular}{| l | l |}\\hline\\multicolumn{2}{l}{foo} ↩\\hlinefoo & bar ↩\\hlinefoo & bar ↩\\hline\\end{tabular}'
)
})
@ -140,12 +209,11 @@ describe('<CodeMirrorEditor/> paste HTML in Visual mode', function () {
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.spy(clipboardData, 'getData').as('get-data')
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should(
'have.text',
'A table\\begin{tabular}{c c}foo & bar ↩\\end{tabular}'
'A table\\begin{tabular}{l l}foo & bar ↩\\end{tabular}'
)
})
@ -156,7 +224,6 @@ describe('<CodeMirrorEditor/> paste HTML in Visual mode', function () {
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.spy(clipboardData, 'getData').as('get-data')
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should('have.text', '{foo}')
@ -166,86 +233,113 @@ describe('<CodeMirrorEditor/> paste HTML in Visual mode', function () {
it('handles a pasted code block', function () {
mountEditor()
const data = '<pre><code>foo</a></pre>'
const data = 'test <pre><code>foo</code></pre> test'
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.spy(clipboardData, 'getData').as('get-data')
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should('have.text', 'foo')
cy.get('@content').should('have.text', 'test foo test')
cy.get('.ol-cm-environment-verbatim').should('have.length', 5)
cy.get('.cm-line').eq(2).click()
cy.get('@content').should(
'have.text',
'\\begin{verbatim}foo\\end{verbatim}'
'test \\begin{verbatim}foo\\end{verbatim} test'
)
})
it('handles pasted inline code', function () {
mountEditor()
const data = '<code>foo</a>'
const data = 'test <code>foo</code> test'
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.spy(clipboardData, 'getData').as('get-data')
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should('have.text', '\\verb|foo|')
cy.get('.ol-cm-command-verb').should('have.length', 1)
cy.get('@content').should('have.text', 'test foo test')
cy.get('.ol-cm-command-verb')
.should('have.length', 1)
.should('have.text', 'foo')
})
it('use text/plain for a wrapper code element', function () {
mountEditor()
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', '<code>foo</code>')
clipboardData.setData('text/plain', 'foo')
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should('have.text', 'foo')
cy.get('.ol-cm-command-verb').should('have.length', 0)
})
it('use text/plain for a code element in a pre element', function () {
mountEditor()
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', '<pre><code>foo</code></pre>')
clipboardData.setData('text/plain', 'foo')
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should('have.text', 'foo')
cy.get('.ol-cm-command-verb').should('have.length', 0)
cy.get('.ol-cm-environment-verbatim').should('have.length', 0)
})
it('handles pasted text with formatting', function () {
mountEditor()
const data = '<b>foo</b><sup>th</sup> <i>bar</i><sub>2</sub> baz'
const data =
'<b>foo</b><sup>th</sup> <i>bar</i><sub>2</sub> baz <em>woo</em> <strong>woo</strong> woo'
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.spy(clipboardData, 'getData').as('get-data')
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should('have.text', 'footh bar2 baz')
cy.get('.ol-cm-command-textbf').should('have.length', 1)
cy.get('.ol-cm-command-textit').should('have.length', 1)
cy.get('@content').should('have.text', 'footh bar2 baz woo woo woo')
cy.get('.ol-cm-command-textbf').should('have.length', 2)
cy.get('.ol-cm-command-textit').should('have.length', 2)
cy.get('.ol-cm-command-textsuperscript').should('have.length', 1)
cy.get('.ol-cm-command-textsubscript').should('have.length', 1)
})
it('protects special characters', function () {
it('removes a non-breaking space when a text node contains no other content', function () {
mountEditor()
const data = 'foo & bar~baz'
const data = 'foo<span>\xa0</span>bar'
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.spy(clipboardData, 'getData').as('get-data')
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should('have.text', 'foo & bar~baz')
cy.get('.ol-cm-character').should('have.length', 2)
cy.get('@content').should('have.text', 'foo bar')
})
it('does not protect special characters in code blocks', function () {
it('does not remove a non-breaking space when a text node contains other content', function () {
mountEditor()
const data = 'foo & bar~baz <code>\\textbf{foo}</code>'
const data = 'foo\xa0bar'
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.spy(clipboardData, 'getData').as('get-data')
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should(
'have.text',
'foo & bar~baz \\verb|\\textbf{foo}|'
)
cy.get('@content').should('have.text', 'foo bar')
})
cy.get('.cm-line').eq(0).type('{Enter}')
cy.get('@content').should('have.text', 'foo & bar~baz \\textbf{foo}')
cy.get('.ol-cm-character').should('have.length', 2)
cy.get('.ol-cm-command-verb').should('have.length', 1)
it('ignores HTML pasted from VS Code', function () {
mountEditor()
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', '<b>foo</b>')
clipboardData.setData('text/plain', 'foo')
clipboardData.setData('application/vnd.code.copymetadata', 'test')
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should('have.text', 'foo')
cy.get('.ol-cm-command-textbf').should('have.length', 0)
})
})