Merge pull request #14830 from overleaf/ae-office-paste

[visual] Improve handling of content pasted from desktop Office applications

GitOrigin-RevId: 10ce77bae008ad078afa22c8fac2cb48af3d9cb4
This commit is contained in:
Alf Eaton 2023-09-14 13:20:41 +01:00 committed by Copybot
parent ab47ac2c20
commit f6e8f3a2a3
2 changed files with 91 additions and 6 deletions

View file

@ -135,8 +135,13 @@ const htmlToLaTeX = (documentElement: HTMLElement) => {
return ''
}
// normalise multiple newlines
return text.replaceAll(/\n{2,}/g, '\n\n')
return (
text
// remove zero-width spaces (e.g. those added by Powerpoint)
.replaceAll('', '')
// normalise multiple newlines
.replaceAll(/\n{2,}/g, '\n\n')
)
}
const processWhitespace = (documentElement: HTMLElement) => {
@ -274,8 +279,34 @@ const processLists = (element: HTMLElement) => {
}
}
const removeNonContentTextNodes = (table: HTMLTableElement) => {
// remove text nodes that are direct children of non-content table elements
const containers = table.querySelectorAll('thead,tbody,tr')
for (const element of [table, ...containers]) {
for (const childNode of element.childNodes) {
if (childNode.nodeType === Node.TEXT_NODE) {
element.removeChild(childNode)
}
}
}
// remove whitespace-only text nodes at the start or end of table cells
for (const element of table.querySelectorAll('th,td')) {
for (const childNode of [element.firstChild, element.lastChild]) {
if (
childNode?.nodeType === Node.TEXT_NODE &&
childNode.textContent?.trim() === ''
) {
element.removeChild(childNode)
}
}
}
}
const processTables = (element: HTMLElement) => {
for (const table of element.querySelectorAll('table')) {
removeNonContentTextNodes(table)
// create a wrapper element for the table and the caption
const container = document.createElement('div')
container.className = 'ol-table-wrap'
@ -670,11 +701,13 @@ const selectors = [
selector: 'tr > td, tr > th',
start: (element: HTMLTableCellElement) => {
let output = ''
if (element.getAttribute('colspan')) {
const colspan = element.getAttribute('colspan')
if (colspan && Number(colspan) > 1) {
output += startMulticolumn(element)
}
// NOTE: multirow is nested inside multicolumn
if (element.getAttribute('rowspan')) {
const rowspan = element.getAttribute('rowspan')
if (rowspan && Number(rowspan) > 1) {
output += startMultirow(element)
}
return output
@ -682,10 +715,12 @@ const selectors = [
end: element => {
let output = ''
// NOTE: multirow is nested inside multicolumn
if (element.getAttribute('rowspan')) {
const rowspan = element.getAttribute('rowspan')
if (rowspan && Number(rowspan) > 1) {
output += '}'
}
if (element.getAttribute('colspan')) {
const colspan = element.getAttribute('colspan')
if (colspan && Number(colspan) > 1) {
output += '}'
}
const row = element.parentElement as HTMLTableRowElement

View file

@ -237,6 +237,26 @@ describe('<CodeMirrorEditor/> paste HTML in Visual mode', function () {
)
})
it('ignores rowspan="1" and colspan="1"', function () {
mountEditor()
const data = [
`<table><tbody>`,
`<tr><td colspan="1" rowspan="1">test</td><td>test</td></tr>`,
`<tr><td>test</td><td>test</td><td>test</td></tr>`,
`</tbody></table>`,
].join('')
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should(
'have.text',
'\\begin{tabular}{l l l}test & test ↩test & test & test ↩\\end{tabular}'
)
})
it('handles a pasted table with adjacent borders and merged cells', function () {
mountEditor()
@ -543,6 +563,18 @@ describe('<CodeMirrorEditor/> paste HTML in Visual mode', function () {
cy.get('@content').should('have.text', 'foo bar')
})
it('removes all zero-width spaces', function () {
mountEditor()
const data = 'foo\u200bbar'
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.get('@content').trigger('paste', { clipboardData })
cy.get('@content').should('have.text', 'foobar')
})
it('ignores HTML pasted from VS Code', function () {
mountEditor()
@ -588,4 +620,22 @@ describe('<CodeMirrorEditor/> paste HTML in Visual mode', function () {
cy.get('.ol-cm-character').should('have.length', 2)
cy.get('.ol-cm-command-verb').should('have.length', 1)
})
it('tidies whitespace in pasted tables', function () {
mountEditor()
const data = `<table>
<tr>
<td>
<p><b>test</b></p>
</td>
</tr>
</table>`
const clipboardData = new DataTransfer()
clipboardData.setData('text/html', data)
cy.get('@content').trigger('paste', { clipboardData })
cy.get('.cm-line').should('have.length', 8)
})
})