Protect special characters in pasted HTML (#14476)

GitOrigin-RevId: 7288f6696ed9af78c1ea4cc94d0e8022da90aacf
This commit is contained in:
Alf Eaton 2023-09-01 12:09:11 +01:00 committed by Copybot
parent a0347c53f0
commit d92dc66e30
2 changed files with 79 additions and 0 deletions

View file

@ -99,6 +99,9 @@ const htmlToLaTeX = (documentElement: HTMLElement) => {
// pre-process table elements
processTables(documentElement)
// protect special characters in non-LaTeX text nodes
protectSpecialCharacters(documentElement)
processMatchedElements(documentElement)
const text = documentElement.textContent
@ -124,6 +127,49 @@ const processWhitespace = (documentElement: HTMLElement) => {
}
}
const isElementNode = (node: Node): node is HTMLElement =>
node.nodeType === Node.ELEMENT_NODE
// TODO: negative lookbehind once Safari supports it
const specialCharacterRegExp = /(^|[^\\])([#$%&~_^\\{}])/g
const specialCharacterReplacer = (
_match: string,
prefix: string,
char: string
) => {
if (char === '\\') {
// convert `\` to `\textbackslash{}`, preserving subsequent whitespace
char = 'textbackslash{}'
}
return `${prefix}\\${char}`
}
const protectSpecialCharacters = (documentElement: HTMLElement) => {
const walker = document.createTreeWalker(
documentElement,
NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT,
node =>
isElementNode(node) && node.tagName === 'CODE'
? NodeFilter.FILTER_REJECT
: NodeFilter.FILTER_ACCEPT
)
for (let node = walker.nextNode(); node; node = walker.nextNode()) {
if (node.nodeType === Node.TEXT_NODE) {
const text = node.textContent
if (text) {
// replace non-backslash-prefixed characters
node.textContent = text.replaceAll(
specialCharacterRegExp,
specialCharacterReplacer
)
}
}
}
}
const processMatchedElements = (documentElement: HTMLElement) => {
for (const item of selectors) {
for (const element of documentElement.querySelectorAll<any>(

View file

@ -363,4 +363,37 @@ describe('<CodeMirrorEditor/> paste HTML in Visual mode', function () {
cy.get('@content').should('have.text', 'foo')
cy.get('.ol-cm-command-textbf').should('have.length', 0)
})
it('protects special characters', function () {
mountEditor()
const data = 'foo & bar~baz'
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should('have.text', 'foo & bar~baz')
cy.get('.ol-cm-character').should('have.length', 2)
})
it('does not protect special characters in code blocks', function () {
mountEditor()
const data = 'foo & bar~baz <code>\\textbf{foo}</code>'
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should(
'have.text',
'foo & bar~baz \\verb|\\textbf{foo}|'
)
cy.get('.cm-line').eq(0).type('{Enter}')
cy.get('@content').should('have.text', 'foo & bar~baz \\textbf{foo}')
cy.get('.ol-cm-character').should('have.length', 2)
cy.get('.ol-cm-command-verb').should('have.length', 1)
})
})