From f6e8f3a2a319468fd08572d767a3cf8d953f0be2 Mon Sep 17 00:00:00 2001 From: Alf Eaton Date: Thu, 14 Sep 2023 13:20:41 +0100 Subject: [PATCH] Merge pull request #14830 from overleaf/ae-office-paste [visual] Improve handling of content pasted from desktop Office applications GitOrigin-RevId: 10ce77bae008ad078afa22c8fac2cb48af3d9cb4 --- .../extensions/visual/paste-html.ts | 47 ++++++++++++++--- ...demirror-editor-visual-paste-html.spec.tsx | 50 +++++++++++++++++++ 2 files changed, 91 insertions(+), 6 deletions(-) diff --git a/services/web/frontend/js/features/source-editor/extensions/visual/paste-html.ts b/services/web/frontend/js/features/source-editor/extensions/visual/paste-html.ts index e88a583290..ac75d867e4 100644 --- a/services/web/frontend/js/features/source-editor/extensions/visual/paste-html.ts +++ b/services/web/frontend/js/features/source-editor/extensions/visual/paste-html.ts @@ -135,8 +135,13 @@ const htmlToLaTeX = (documentElement: HTMLElement) => { return '' } - // normalise multiple newlines - return text.replaceAll(/\n{2,}/g, '\n\n') + return ( + text + // remove zero-width spaces (e.g. those added by Powerpoint) + .replaceAll('​', '') + // normalise multiple newlines + .replaceAll(/\n{2,}/g, '\n\n') + ) } const processWhitespace = (documentElement: HTMLElement) => { @@ -274,8 +279,34 @@ const processLists = (element: HTMLElement) => { } } +const removeNonContentTextNodes = (table: HTMLTableElement) => { + // remove text nodes that are direct children of non-content table elements + const containers = table.querySelectorAll('thead,tbody,tr') + for (const element of [table, ...containers]) { + for (const childNode of element.childNodes) { + if (childNode.nodeType === Node.TEXT_NODE) { + element.removeChild(childNode) + } + } + } + + // remove whitespace-only text nodes at the start or end of table cells + for (const element of table.querySelectorAll('th,td')) { + for (const childNode of [element.firstChild, element.lastChild]) { + if ( + childNode?.nodeType === Node.TEXT_NODE && + childNode.textContent?.trim() === '' + ) { + element.removeChild(childNode) + } + } + } +} + const processTables = (element: HTMLElement) => { for (const table of element.querySelectorAll('table')) { + removeNonContentTextNodes(table) + // create a wrapper element for the table and the caption const container = document.createElement('div') container.className = 'ol-table-wrap' @@ -670,11 +701,13 @@ const selectors = [ selector: 'tr > td, tr > th', start: (element: HTMLTableCellElement) => { let output = '' - if (element.getAttribute('colspan')) { + const colspan = element.getAttribute('colspan') + if (colspan && Number(colspan) > 1) { output += startMulticolumn(element) } // NOTE: multirow is nested inside multicolumn - if (element.getAttribute('rowspan')) { + const rowspan = element.getAttribute('rowspan') + if (rowspan && Number(rowspan) > 1) { output += startMultirow(element) } return output @@ -682,10 +715,12 @@ const selectors = [ end: element => { let output = '' // NOTE: multirow is nested inside multicolumn - if (element.getAttribute('rowspan')) { + const rowspan = element.getAttribute('rowspan') + if (rowspan && Number(rowspan) > 1) { output += '}' } - if (element.getAttribute('colspan')) { + const colspan = element.getAttribute('colspan') + if (colspan && Number(colspan) > 1) { output += '}' } const row = element.parentElement as HTMLTableRowElement diff --git a/services/web/test/frontend/features/source-editor/components/codemirror-editor-visual-paste-html.spec.tsx b/services/web/test/frontend/features/source-editor/components/codemirror-editor-visual-paste-html.spec.tsx index 540b59ee4b..bf9c32afe3 100644 --- a/services/web/test/frontend/features/source-editor/components/codemirror-editor-visual-paste-html.spec.tsx +++ b/services/web/test/frontend/features/source-editor/components/codemirror-editor-visual-paste-html.spec.tsx @@ -237,6 +237,26 @@ describe(' paste HTML in Visual mode', function () { ) }) + it('ignores rowspan="1" and colspan="1"', function () { + mountEditor() + + const data = [ + ``, + ``, + ``, + `
testtest
testtesttest
`, + ].join('') + + const clipboardData = new DataTransfer() + clipboardData.setData('text/html', data) + cy.get('@content').trigger('paste', { clipboardData }) + + cy.get('@content').should( + 'have.text', + '\\begin{tabular}{l l l}test & test ↩test & test & test ↩\\end{tabular}' + ) + }) + it('handles a pasted table with adjacent borders and merged cells', function () { mountEditor() @@ -543,6 +563,18 @@ describe(' paste HTML in Visual mode', function () { cy.get('@content').should('have.text', 'foo bar') }) + it('removes all zero-width spaces', function () { + mountEditor() + + const data = 'foo\u200bbar' + + const clipboardData = new DataTransfer() + clipboardData.setData('text/html', data) + cy.get('@content').trigger('paste', { clipboardData }) + + cy.get('@content').should('have.text', 'foobar') + }) + it('ignores HTML pasted from VS Code', function () { mountEditor() @@ -588,4 +620,22 @@ describe(' paste HTML in Visual mode', function () { cy.get('.ol-cm-character').should('have.length', 2) cy.get('.ol-cm-command-verb').should('have.length', 1) }) + + it('tidies whitespace in pasted tables', function () { + mountEditor() + + const data = ` + + + +
+

test

+
` + + const clipboardData = new DataTransfer() + clipboardData.setData('text/html', data) + cy.get('@content').trigger('paste', { clipboardData }) + + cy.get('.cm-line').should('have.length', 8) + }) })