From 3ab54b5b14fccb38f77f3986a36d684b854be32f Mon Sep 17 00:00:00 2001 From: Eric Mc Sween <5454374+emcsween@users.noreply.github.com> Date: Tue, 26 Mar 2024 07:28:14 -0400 Subject: [PATCH] Merge pull request #17368 from overleaf/em-handle-metadata Handle tracked changes in project-history GitOrigin-RevId: 9c790a4dcd874f0d68173fc65cb6823a4da55cc6 --- .../document-updater/app/js/RangesManager.js | 51 ++- .../document-updater/app/js/UpdateManager.js | 9 +- services/document-updater/app/js/types.ts | 5 +- .../js/RangesManager/RangesManagerTests.js | 72 +++- .../js/UpdateManager/UpdateManagerTests.js | 84 +++- .../app/js/UpdateCompressor.js | 212 ++++++++-- .../app/js/UpdateTranslator.js | 164 ++++++-- services/project-history/app/js/types.ts | 31 +- .../UpdateCompressor/UpdateCompressorTests.js | 398 ++++++++++++++++++ .../UpdateTranslator/UpdateTranslatorTests.js | 271 ++++++++++++ 10 files changed, 1189 insertions(+), 108 deletions(-) diff --git a/services/document-updater/app/js/RangesManager.js b/services/document-updater/app/js/RangesManager.js index c9d0367f71..3ca3d3a780 100644 --- a/services/document-updater/app/js/RangesManager.js +++ b/services/document-updater/app/js/RangesManager.js @@ -13,6 +13,7 @@ const { isInsert, isDelete, isComment } = require('./Utils') * @typedef {import('./types').DeleteOp} DeleteOp * @typedef {import('./types').HistoryCommentOp} HistoryCommentOp * @typedef {import('./types').HistoryDeleteOp} HistoryDeleteOp + * @typedef {import('./types').HistoryDeleteTrackedChange} HistoryDeleteTrackedChange * @typedef {import('./types').HistoryInsertOp} HistoryInsertOp * @typedef {import('./types').HistoryOp} HistoryOp * @typedef {import('./types').HistoryUpdate} HistoryUpdate @@ -277,22 +278,44 @@ function getHistoryOpForInsert(op, comments, changes) { */ function getHistoryOpForDelete(op, changes, opts = {}) { let hpos = op.p - const hsplits = [] + const opEnd = op.p + op.d.length + /** @type HistoryDeleteTrackedChange[] */ + const changesInsideDelete = [] for (const change of changes) { - if (!isDelete(change.op)) { - // We're only interested in tracked deletes - continue - } - if (change.op.p <= op.p) { - // Tracked delete is before or at the position of the incoming delete. - // Move the op forward. - hpos += change.op.d.length + if (isDelete(change.op)) { + // Tracked delete is before or at the position of the incoming delete. + // Move the op forward. + hpos += change.op.d.length + } else if (isInsert(change.op)) { + const changeEnd = change.op.p + change.op.i.length + const endPos = Math.min(changeEnd, opEnd) + if (endPos > op.p) { + // Part of the tracked insert is inside the delete + changesInsideDelete.push({ + type: 'insert', + offset: 0, + length: endPos - op.p, + }) + } + } } else if (change.op.p < op.p + op.d.length) { - // Tracked delete inside the deleted text. Record a split for the history system. - hsplits.push({ offset: change.op.p - op.p, length: change.op.d.length }) + // Tracked change inside the deleted text. Record it for the history system. + if (isDelete(change.op)) { + changesInsideDelete.push({ + type: 'delete', + offset: change.op.p - op.p, + length: change.op.d.length, + }) + } else if (isInsert(change.op)) { + changesInsideDelete.push({ + type: 'insert', + offset: change.op.p - op.p, + length: Math.min(change.op.i.length, opEnd - change.op.p), + }) + } } else { - // We've seen all tracked deletes before or inside the delete + // We've seen all tracked changes before or inside the delete break } } @@ -302,8 +325,8 @@ function getHistoryOpForDelete(op, changes, opts = {}) { if (hpos !== op.p) { historyOp.hpos = hpos } - if (hsplits.length > 0) { - historyOp.hsplits = hsplits + if (changesInsideDelete.length > 0) { + historyOp.trackedChanges = changesInsideDelete } return historyOp } diff --git a/services/document-updater/app/js/UpdateManager.js b/services/document-updater/app/js/UpdateManager.js index fc71e74b63..dd4d2db728 100644 --- a/services/document-updater/app/js/UpdateManager.js +++ b/services/document-updater/app/js/UpdateManager.js @@ -162,7 +162,7 @@ const UpdateManager = { ) profile.log('RedisManager.updateDocument') - UpdateManager._addMetadataToHistoryUpdates( + UpdateManager._adjustHistoryUpdatesMetadata( historyUpdates, pathname, projectHistoryId, @@ -311,7 +311,7 @@ const UpdateManager = { * @param {Ranges} ranges * @param {boolean} historyRangesSupport */ - _addMetadataToHistoryUpdates( + _adjustHistoryUpdatesMetadata( updates, pathname, projectHistoryId, @@ -368,6 +368,11 @@ const UpdateManager = { } } } + + if (!historyRangesSupport) { + // Prevent project-history from processing tracked changes + delete update.meta.tc + } } }, } diff --git a/services/document-updater/app/js/types.ts b/services/document-updater/app/js/types.ts index 937803eed3..6d451f4205 100644 --- a/services/document-updater/app/js/types.ts +++ b/services/document-updater/app/js/types.ts @@ -62,10 +62,11 @@ export type HistoryInsertOp = InsertOp & { export type HistoryDeleteOp = DeleteOp & { hpos?: number - hsplits?: HistoryDeleteSplit[] + trackedChanges?: HistoryDeleteTrackedChange[] } -export type HistoryDeleteSplit = { +export type HistoryDeleteTrackedChange = { + type: 'insert' | 'delete' offset: number length: number } diff --git a/services/document-updater/test/unit/js/RangesManager/RangesManagerTests.js b/services/document-updater/test/unit/js/RangesManager/RangesManagerTests.js index 207d5b771d..d4e3ab9250 100644 --- a/services/document-updater/test/unit/js/RangesManager/RangesManagerTests.js +++ b/services/document-updater/test/unit/js/RangesManager/RangesManagerTests.js @@ -323,7 +323,7 @@ describe('RangesManager', function () { }) }) - describe('deletes among tracked deletes', function () { + describe('deletes over tracked changes', function () { beforeEach(function () { // original text is "on[1]e [22](three) f[333]ou[4444]r [55555]five" // [] denotes tracked deletes @@ -362,16 +362,78 @@ describe('RangesManager', function () { d: 'four ', p: 10, hpos: 13, - hsplits: [ - { offset: 1, length: 3 }, - { offset: 3, length: 4 }, + trackedChanges: [ + { type: 'delete', offset: 1, length: 3 }, + { type: 'delete', offset: 3, length: 4 }, ], }, ], // the "three" delete is offset to the right by the two first tracked // deletes - [{ d: 'three ', p: 4, hpos: 7 }], + [ + { + d: 'three ', + p: 4, + hpos: 7, + trackedChanges: [{ type: 'insert', offset: 0, length: 5 }], + }, + ], + ]) + }) + }) + + describe('deletes that overlap tracked inserts', function () { + beforeEach(function () { + // original text is "(one) (three) (four) five" + // [] denotes tracked deletes + // () denotes tracked inserts + this.ranges = { + comments: [], + changes: makeRanges([ + { i: 'one', p: 0 }, + { i: 'three', p: 4 }, + { i: 'four', p: 10 }, + ]), + } + this.updates = makeUpdates( + [ + { d: 'ne th', p: 1 }, + { d: 'ou', p: 6 }, + ], + { tc: 'tracked-change-id' } + ) + this.newDocLines = ['oree fr five'] + this.result = this.RangesManager.applyUpdate( + this.project_id, + this.doc_id, + this.ranges, + this.updates, + this.newDocLines, + { historyRangesSupport: true } + ) + }) + + it('should split and offset deletes appropriately', function () { + expect(this.result.historyUpdates.map(x => x.op)).to.deep.equal([ + [ + { + d: 'ne th', + p: 1, + trackedChanges: [ + { type: 'insert', offset: 0, length: 2 }, + { type: 'insert', offset: 3, length: 2 }, + ], + }, + ], + [ + { + d: 'ou', + p: 6, + hpos: 7, + trackedChanges: [{ type: 'insert', offset: 0, length: 2 }], + }, + ], ]) }) }) diff --git a/services/document-updater/test/unit/js/UpdateManager/UpdateManagerTests.js b/services/document-updater/test/unit/js/UpdateManager/UpdateManagerTests.js index 20dd9a817d..bd5a46a81b 100644 --- a/services/document-updater/test/unit/js/UpdateManager/UpdateManagerTests.js +++ b/services/document-updater/test/unit/js/UpdateManager/UpdateManagerTests.js @@ -344,7 +344,7 @@ describe('UpdateManager', function () { appliedOps: this.appliedOps, }) this.RedisManager.promises.updateDocument.resolves() - this.UpdateManager.promises._addMetadataToHistoryUpdates = sinon.stub() + this.UpdateManager.promises._adjustHistoryUpdatesMetadata = sinon.stub() }) describe('normally', function () { @@ -395,7 +395,7 @@ describe('UpdateManager', function () { }) it('should add metadata to the ops', function () { - this.UpdateManager.promises._addMetadataToHistoryUpdates.should.have.been.calledWith( + this.UpdateManager.promises._adjustHistoryUpdatesMetadata.should.have.been.calledWith( this.historyUpdates, this.pathname, this.projectHistoryId, @@ -523,10 +523,10 @@ describe('UpdateManager', function () { }) }) - describe('_addMetadataToHistoryUpdates', function () { - it('should add projectHistoryId, pathname and doc_length metadata to the ops', function () { - const lines = ['some', 'test', 'data'] - const historyUpdates = [ + describe('_adjustHistoryUpdatesMetadata', function () { + beforeEach(function () { + this.lines = ['some', 'test', 'data'] + this.historyUpdates = [ { v: 42, op: [ @@ -552,21 +552,81 @@ describe('UpdateManager', function () { }, { v: 49, op: [{ i: 'penguin', p: 18 }] }, ] - const ranges = { + this.ranges = { changes: [ { op: { d: 'bingbong', p: 12 } }, { op: { i: 'test', p: 5 } }, ], } - this.UpdateManager._addMetadataToHistoryUpdates( - historyUpdates, + }) + + it('should add projectHistoryId, pathname and doc_length metadata to the ops', function () { + this.UpdateManager._adjustHistoryUpdatesMetadata( + this.historyUpdates, this.pathname, this.projectHistoryId, - lines, - ranges, + this.lines, + this.ranges, + false + ) + this.historyUpdates.should.deep.equal([ + { + projectHistoryId: this.projectHistoryId, + v: 42, + op: [ + { i: 'bing', p: 12, trackedDeleteRejection: true }, + { i: 'foo', p: 4 }, + { i: 'bar', p: 6 }, + ], + meta: { + pathname: this.pathname, + doc_length: 14, + }, + }, + { + projectHistoryId: this.projectHistoryId, + v: 45, + op: [ + { d: 'qux', p: 4 }, + { i: 'bazbaz', p: 14 }, + { d: 'bong', p: 28, u: true }, + ], + meta: { + pathname: this.pathname, + doc_length: 24, // 14 + 'bing' + 'foo' + 'bar' + }, + }, + { + projectHistoryId: this.projectHistoryId, + v: 47, + op: [{ d: 'so', p: 0 }], + meta: { + pathname: this.pathname, + doc_length: 23, // 24 - 'qux' + 'bazbaz' - 'bong' + }, + }, + { + projectHistoryId: this.projectHistoryId, + v: 49, + op: [{ i: 'penguin', p: 18 }], + meta: { + pathname: this.pathname, + doc_length: 21, // 23 - 'so' + }, + }, + ]) + }) + + it('should add additional metadata when ranges support is enabled', function () { + this.UpdateManager._adjustHistoryUpdatesMetadata( + this.historyUpdates, + this.pathname, + this.projectHistoryId, + this.lines, + this.ranges, true ) - historyUpdates.should.deep.equal([ + this.historyUpdates.should.deep.equal([ { projectHistoryId: this.projectHistoryId, v: 42, diff --git a/services/project-history/app/js/UpdateCompressor.js b/services/project-history/app/js/UpdateCompressor.js index d6d6867b1d..86fcd76dd9 100644 --- a/services/project-history/app/js/UpdateCompressor.js +++ b/services/project-history/app/js/UpdateCompressor.js @@ -1,6 +1,15 @@ +// @ts-check + import OError from '@overleaf/o-error' import DMP from 'diff-match-patch' +/** + * @typedef {import('./types').DeleteOp} DeleteOp + * @typedef {import('./types').InsertOp} InsertOp + * @typedef {import('./types').Op} Op + * @typedef {import('./types').Update} Update + */ + const MAX_TIME_BETWEEN_UPDATES = 60 * 1000 // one minute const MAX_UPDATE_SIZE = 2 * 1024 * 1024 // 2 MB const ADDED = 1 @@ -31,47 +40,82 @@ const mergeUpdatesWithOp = function (firstUpdate, secondUpdate, op) { return update } -const adjustLengthByOp = function (length, op) { - if (op.i != null) { - return length + op.i.length - } else if (op.d != null) { - return length - op.d.length - } else if (op.c != null) { +/** + * Adjust the given length to account for the given op + * + * The resulting length is the new length of the doc after the op is applied. + * + * @param {number} length + * @param {Op} op + * @param {object} opts + * @param {boolean} [opts.tracked] - whether or not the update is a tracked change + * @returns {number} the adjusted length + */ +function adjustLengthByOp(length, op, opts = {}) { + if ('i' in op && op.i != null) { + if (op.trackedDeleteRejection) { + // Tracked delete rejection: will be translated into a retain + return length + } else { + return length + op.i.length + } + } else if ('d' in op && op.d != null) { + if (opts.tracked && op.u == null) { + // Tracked delete: will be translated into a retain, except where it overlaps tracked inserts. + for (const change of op.trackedChanges ?? []) { + if (change.type === 'insert') { + length -= change.length + } + } + return length + } else { + return length - op.d.length + } + } else if ('c' in op && op.c != null) { return length } else { throw new OError('unexpected op type') } } -// Updates come from the doc updater in format -// { -// op: [ { ... op1 ... }, { ... op2 ... } ] -// meta: { ts: ..., user_id: ... } -// } -// but it's easier to work with on op per update, so convert these updates to -// our compressed format -// [{ -// op: op1 -// meta: { ts: ..., user_id: ... } -// }, { -// op: op2 -// meta: { ts: ..., user_id: ... } -// }] +/** + * Updates come from the doc updater in format + * { + * op: [ { ... op1 ... }, { ... op2 ... } ] + * meta: { ts: ..., user_id: ... } + * } + * but it's easier to work with on op per update, so convert these updates to + * our compressed format + * [{ + * op: op1 + * meta: { ts: ..., user_id: ... } + * }, { + * op: op2 + * meta: { ts: ..., user_id: ... } + * }] + * + * @param {Update[]} updates + * @returns {Update[]} single op updates + */ export function convertToSingleOpUpdates(updates) { const splitUpdates = [] for (const update of updates) { - if (update.op == null) { + if (!('op' in update)) { // Not a text op, likely a project strucure op splitUpdates.push(update) continue } const ops = update.op - let { doc_length: docLength } = update.meta + + let docLength = update.meta.history_doc_length ?? update.meta.doc_length for (const op of ops) { const splitUpdate = cloneWithOp(update, op) if (docLength != null) { splitUpdate.meta.doc_length = docLength - docLength = adjustLengthByOp(docLength, op) + docLength = adjustLengthByOp(docLength, op, { + tracked: update.meta.tc != null, + }) + delete splitUpdate.meta.history_doc_length } splitUpdates.push(splitUpdate) } @@ -146,13 +190,21 @@ export function compressUpdates(updates) { return compressedUpdates } +/** + * If possible, merge two updates into a single update that has the same effect. + * + * It's useful to do some of this work at this point while we're dealing with + * document-updater updates. The deletes, in particular include the deleted + * text. This allows us to find pieces of inserts and deletes that cancel each + * other out because they insert/delete the exact same text. This compression + * makes the diff smaller. + */ function _concatTwoUpdates(firstUpdate, secondUpdate) { // Previously we cloned firstUpdate and secondUpdate at this point but we // can skip this step because whenever they are returned with // modification there is always a clone at that point via // mergeUpdatesWithOp. - let offset if (firstUpdate.op == null || secondUpdate.op == null) { // Project structure ops return [firstUpdate, secondUpdate] @@ -185,6 +237,45 @@ function _concatTwoUpdates(firstUpdate, secondUpdate) { return [firstUpdate, secondUpdate] } + if ( + (firstUpdate.meta.tc == null && secondUpdate.meta.tc != null) || + (firstUpdate.meta.tc != null && secondUpdate.meta.tc == null) + ) { + // One update is tracking changes and the other isn't. Tracking changes + // results in different behaviour in the history, so we need to keep these + // two updates separate. + return [firstUpdate, secondUpdate] + } + + if (Boolean(firstUpdate.op.u) !== Boolean(secondUpdate.op.u)) { + // One update is an undo and the other isn't. If we were to merge the two + // updates, we would have to choose one value for the flag, which would be + // partially incorrect. Moreover, a tracked delete that is also an undo is + // treated as a tracked insert rejection by the history, so these updates + // need to be well separated. + return [firstUpdate, secondUpdate] + } + + if ( + firstUpdate.op.trackedDeleteRejection || + secondUpdate.op.trackedDeleteRejection + ) { + // Do not merge tracked delete rejections. Each tracked delete rejection is + // a separate operation. + return [firstUpdate, secondUpdate] + } + + if ( + firstUpdate.op.trackedChanges != null || + secondUpdate.op.trackedChanges != null + ) { + // Do not merge ops that span tracked changes. + // TODO: This could theoretically be handled, but it would be complex. One + // would need to take tracked deletes into account when merging inserts and + // deletes together. + return [firstUpdate, secondUpdate] + } + const firstOp = firstUpdate.op const secondOp = secondUpdate.op const firstSize = @@ -206,26 +297,38 @@ function _concatTwoUpdates(firstUpdate, secondUpdate) { ) { return [ mergeUpdatesWithOp(firstUpdate, secondUpdate, { - p: firstOp.p, + ...firstOp, i: strInject(firstOp.i, secondOp.p - firstOp.p, secondOp.i), }), ] - // Two deletes - } else if ( + } + + // Two deletes + if ( firstOp.d != null && secondOp.d != null && firstOpInsideSecondOp && - combinedLengthUnderLimit + combinedLengthUnderLimit && + firstUpdate.meta.tc == null && + secondUpdate.meta.tc == null ) { return [ mergeUpdatesWithOp(firstUpdate, secondUpdate, { - p: secondOp.p, + ...secondOp, d: strInject(secondOp.d, firstOp.p - secondOp.p, firstOp.d), }), ] - // An insert and then a delete - } else if (firstOp.i != null && secondOp.d != null && secondOpInsideFirstOp) { - offset = secondOp.p - firstOp.p + } + + // An insert and then a delete + if ( + firstOp.i != null && + secondOp.d != null && + secondOpInsideFirstOp && + firstUpdate.meta.tc == null && + secondUpdate.meta.tc == null + ) { + const offset = secondOp.p - firstOp.p const insertedText = firstOp.i.slice(offset, offset + secondOp.d.length) // Only trim the insert when the delete is fully contained within in it if (insertedText === secondOp.d) { @@ -235,7 +338,7 @@ function _concatTwoUpdates(firstUpdate, secondUpdate) { } else { return [ mergeUpdatesWithOp(firstUpdate, secondUpdate, { - p: firstOp.p, + ...firstOp, i: insert, }), ] @@ -244,35 +347,60 @@ function _concatTwoUpdates(firstUpdate, secondUpdate) { // This will only happen if the delete extends outside the insert return [firstUpdate, secondUpdate] } + } - // A delete then an insert at the same place, likely a copy-paste of a chunk of content - } else if ( + // A delete then an insert at the same place, likely a copy-paste of a chunk of content + if ( firstOp.d != null && secondOp.i != null && - firstOp.p === secondOp.p + firstOp.p === secondOp.p && + firstUpdate.meta.tc == null && + secondUpdate.meta.tc == null ) { - offset = firstOp.p + const offset = firstOp.p + const hoffset = firstOp.hpos const diffUpdates = diffAsShareJsOps(firstOp.d, secondOp.i).map( function (op) { - op.p += offset + // diffAsShareJsOps() returns ops with positions relative to the position + // of the copy/paste. We need to adjust these positions so that they + // apply to the whole document instead. + const pos = op.p + op.p = pos + offset + if (hoffset != null) { + op.hpos = pos + hoffset + } + if (firstOp.u && secondOp.u) { + op.u = true + } return mergeUpdatesWithOp(firstUpdate, secondUpdate, op) } ) // Doing a diff like this loses track of the doc lengths for each // update, so recalculate them - let { doc_length: docLength } = firstUpdate.meta + let docLength = + firstUpdate.meta.history_doc_length ?? firstUpdate.meta.doc_length for (const update of diffUpdates) { update.meta.doc_length = docLength - docLength = adjustLengthByOp(docLength, update.op) + docLength = adjustLengthByOp(docLength, update.op, { + tracked: update.meta.tc != null, + }) + delete update.meta.history_doc_length } return diffUpdates - } else { - return [firstUpdate, secondUpdate] } + + return [firstUpdate, secondUpdate] } +/** + * Return the diff between two strings + * + * @param {string} before + * @param {string} after + * @returns {(InsertOp | DeleteOp)[]} the ops that generate that diff + */ export function diffAsShareJsOps(before, after) { const diffs = dmp.diff_main(before, after) dmp.diff_cleanupSemantic(diffs) diff --git a/services/project-history/app/js/UpdateTranslator.js b/services/project-history/app/js/UpdateTranslator.js index f17d42801a..1c6834d330 100644 --- a/services/project-history/app/js/UpdateTranslator.js +++ b/services/project-history/app/js/UpdateTranslator.js @@ -6,17 +6,19 @@ import * as Errors from './Errors.js' import * as OperationsCompressor from './OperationsCompressor.js' /** - * @typedef {import('./types.ts').AddDocUpdate} AddDocUpdate - * @typedef {import('./types.ts').AddFileUpdate} AddFileUpdate - * @typedef {import('./types.ts').CommentOp} CommentOp - * @typedef {import('./types.ts').DeleteOp} DeleteOp - * @typedef {import('./types.ts').InsertOp} InsertOp - * @typedef {import('./types.ts').Op} Op - * @typedef {import('./types.ts').RenameUpdate} RenameUpdate - * @typedef {import('./types.ts').TextUpdate} TextUpdate - * @typedef {import('./types.ts').DeleteCommentUpdate} DeleteCommentUpdate - * @typedef {import('./types.ts').Update} Update - * @typedef {import('./types.ts').UpdateWithBlob} UpdateWithBlob + * @typedef {import('./types').AddDocUpdate} AddDocUpdate + * @typedef {import('./types').AddFileUpdate} AddFileUpdate + * @typedef {import('./types').CommentOp} CommentOp + * @typedef {import('./types').DeleteOp} DeleteCommentUpdate + * @typedef {import('./types').DeleteOp} DeleteOp + * @typedef {import('./types').InsertOp} InsertOp + * @typedef {import('./types').Op} Op + * @typedef {import('./types').RawScanOp} RawScanOp + * @typedef {import('./types').RenameUpdate} RenameUpdate + * @typedef {import('./types').TextUpdate} TextUpdate + * @typedef {import('./types').TrackingProps} TrackingProps + * @typedef {import('./types').Update} Update + * @typedef {import('./types').UpdateWithBlob} UpdateWithBlob */ /** @@ -63,15 +65,14 @@ function _convertToChange(projectId, updateWithBlob) { ] projectVersion = update.version } else if (isTextUpdate(update)) { - const docLength = update.meta.doc_length + const docLength = update.meta.history_doc_length ?? update.meta.doc_length let pathname = update.meta.pathname pathname = _convertPathname(pathname) const builder = new OperationsBuilder(docLength, pathname) // convert ops for (const op of update.op) { - // if this throws an exception it will be caught in convertToChanges - builder.addOp(op) + builder.addOp(op, update) } operations = builder.finish() // add doc version information if present @@ -232,7 +233,7 @@ class OperationsBuilder { /** * Currently built text operation * - * @type {(number | string)[]} + * @type {RawScanOp[]} */ this.textOperation = [] @@ -247,9 +248,15 @@ class OperationsBuilder { /** * @param {Op} op + * @param {Update} update * @returns {void} */ - addOp(op) { + addOp(op, update) { + // We sometimes receive operations that operate at positions outside the + // docLength. Document updater coerces the position to the end of the + // document. We do the same here. + const pos = Math.min(op.hpos ?? op.p, this.docLength) + if (isComment(op)) { // Close the current text operation this.pushTextOperation() @@ -260,8 +267,8 @@ class OperationsBuilder { commentId: op.t, ranges: [ { - pos: op.p, - length: op.c.length, + pos, + length: op.hlen ?? op.c.length, }, ], }) @@ -272,11 +279,6 @@ class OperationsBuilder { throw new Errors.UnexpectedOpTypeError('unexpected op type', { op }) } - // We sometimes receive operations that operate at positions outside the - // docLength. Document updater coerces the position to the end of the - // document. We do the same here. - const pos = Math.min(op.p, this.docLength) - if (pos < this.cursor) { this.pushTextOperation() // At this point, this.cursor === 0 and we can continue @@ -287,26 +289,128 @@ class OperationsBuilder { } if (isInsert(op)) { - this.insert(op.i) + if (op.trackedDeleteRejection) { + this.retain(op.i.length, { + tracking: { + type: 'none', + userId: update.meta.user_id, + ts: new Date(update.meta.ts).toISOString(), + }, + }) + } else if (update.meta.tc != null) { + this.insert(op.i, { + tracking: { + type: 'insert', + userId: update.meta.user_id, + ts: new Date(update.meta.ts).toISOString(), + }, + }) + } else { + this.insert(op.i) + } } if (isDelete(op)) { - this.delete(op.d.length) + const changes = op.trackedChanges ?? [] + + // Tracked changes should already be ordered by offset, but let's make + // sure they are. + changes.sort((a, b) => { + const posOrder = a.offset - b.offset + if (posOrder !== 0) { + return posOrder + } else if (a.type === 'insert' && b.type === 'delete') { + return 1 + } else if (a.type === 'delete' && b.type === 'insert') { + return -1 + } else { + return 0 + } + }) + + let offset = 0 + for (const change of changes) { + if (change.offset > offset) { + // Handle the portion before the tracked change + if (update.meta.tc != null && op.u == null) { + // This is a tracked delete + this.retain(change.offset - offset, { + tracking: { + type: 'delete', + userId: update.meta.user_id, + ts: new Date(update.meta.ts).toISOString(), + }, + }) + } else { + // This is a regular delete + this.delete(change.offset - offset) + } + offset = change.offset + } + + // Now, handle the portion inside the tracked change + if (change.type === 'delete') { + // Tracked deletes are skipped over when deleting + this.retain(change.length) + } else if (change.type === 'insert') { + // Deletes inside tracked inserts are always regular deletes + this.delete(change.length) + offset += change.length + } + } + if (offset < op.d.length) { + // Handle the portion after the last tracked change + if (update.meta.tc != null && op.u == null) { + // This is a tracked delete + this.retain(op.d.length - offset, { + tracking: { + type: 'delete', + userId: update.meta.user_id, + ts: new Date(update.meta.ts).toISOString(), + }, + }) + } else { + // This is a regular delete + this.delete(op.d.length - offset) + } + } } } - retain(length) { - this.textOperation.push(length) + /** + * @param {number} length + * @param {object} opts + * @param {TrackingProps} [opts.tracking] + */ + retain(length, opts = {}) { + if (opts.tracking) { + this.textOperation.push({ r: length, ...opts }) + } else { + this.textOperation.push(length) + } this.cursor += length } - insert(str) { - this.textOperation.push(str) + /** + * @param {string} str + * @param {object} opts + * @param {TrackingProps} [opts.tracking] + */ + insert(str, opts = {}) { + if (opts.tracking) { + this.textOperation.push({ i: str, ...opts }) + } else { + this.textOperation.push(str) + } this.cursor += str.length this.docLength += str.length } - delete(length) { + /** + * @param {number} length + * @param {object} opts + */ + delete(length, opts = {}) { this.textOperation.push(-length) this.docLength -= length } diff --git a/services/project-history/app/js/types.ts b/services/project-history/app/js/types.ts index b1af671335..5d3365251f 100644 --- a/services/project-history/app/js/types.ts +++ b/services/project-history/app/js/types.ts @@ -2,10 +2,11 @@ export type Update = TextUpdate | AddDocUpdate | AddFileUpdate | RenameUpdate | export type UpdateMeta = { user_id: string - ts: string + ts: number source?: string type?: string origin?: RawOrigin + tc?: string } export type TextUpdate = { @@ -15,6 +16,7 @@ export type TextUpdate = { meta: UpdateMeta & { pathname: string doc_length: number + history_doc_length?: number } } @@ -52,17 +54,31 @@ export type Op = InsertOp | DeleteOp | CommentOp export type InsertOp = { i: string p: number + u?: boolean + hpos?: number + trackedDeleteRejection?: boolean } export type DeleteOp = { d: string p: number + u?: boolean + hpos?: number + trackedChanges?: TrackedChangesInsideDelete[] +} + +export type TrackedChangesInsideDelete = { + type: 'insert' | 'delete' + offset: number + length: number } export type CommentOp = { c: string p: number t: string + hpos?: number + hlen?: number } export type UpdateWithBlob = { @@ -73,3 +89,16 @@ export type UpdateWithBlob = { export type RawOrigin = { kind: string } + +export type TrackingProps = { + type: 'insert' | 'delete' | 'none' + userId: string + ts: string +} + +export type RawScanOp = + | number + | string + | { r: number; tracking?: TrackingProps } + | { i: string; tracking?: TrackingProps; commentIds?: string[] } + | { d: number; tracking?: TrackingProps } diff --git a/services/project-history/test/unit/js/UpdateCompressor/UpdateCompressorTests.js b/services/project-history/test/unit/js/UpdateCompressor/UpdateCompressorTests.js index 727d3fd935..ad64125705 100644 --- a/services/project-history/test/unit/js/UpdateCompressor/UpdateCompressorTests.js +++ b/services/project-history/test/unit/js/UpdateCompressor/UpdateCompressorTests.js @@ -148,6 +148,61 @@ describe('UpdateCompressor', function () { }, ]) }) + + it('should take tracked changes into account when calculating the doc length', function () { + const meta = { + ts: this.ts1, + user_id: this.user_id, + tc: 'tracked-change-id', + } + expect( + this.UpdateCompressor.convertToSingleOpUpdates([ + { + op: [ + { p: 6, i: 'orange' }, // doc_length += 6 + { p: 22, d: 'apple' }, // doc_length doesn't change + { p: 12, i: 'melon', u: true }, // doc_length += 5 + { p: 18, i: 'banana', u: true, trackedDeleteRejection: true }, // doc_length doesn't change + { p: 8, d: 'pineapple', u: true }, // doc_length -= 9 + { p: 11, i: 'fruit salad' }, + ], + meta: { ...meta, doc_length: 20, history_doc_length: 30 }, + v: 42, + }, + ]) + ).to.deep.equal([ + { + op: { p: 6, i: 'orange' }, + meta: { ...meta, doc_length: 30 }, + v: 42, + }, + { + op: { p: 22, d: 'apple' }, + meta: { ...meta, doc_length: 36 }, + v: 42, + }, + { + op: { p: 12, i: 'melon', u: true }, + meta: { ...meta, doc_length: 36 }, + v: 42, + }, + { + op: { p: 18, i: 'banana', u: true, trackedDeleteRejection: true }, + meta: { ...meta, doc_length: 41 }, + v: 42, + }, + { + op: { p: 8, d: 'pineapple', u: true }, + meta: { ...meta, doc_length: 41 }, + v: 42, + }, + { + op: { p: 11, i: 'fruit salad' }, + meta: { ...meta, doc_length: 32 }, + v: 42, + }, + ]) + }) }) describe('concatUpdatesWithSameVersion', function () { @@ -571,6 +626,153 @@ describe('UpdateCompressor', function () { }, ]) }) + + it("should not merge updates that track changes and updates that don't", function () { + expect( + this.UpdateCompressor.compressUpdates([ + { + pathname: 'main.tex', + op: { p: 3, i: 'foo' }, + meta: { ts: this.ts1, user_id: this.user_id }, + v: 42, + }, + { + pathname: 'main.tex', + op: { p: 6, i: 'bar' }, + meta: { ts: this.ts2, user_id: this.user_id, tc: 'tracking-id' }, + v: 43, + }, + ]) + ).to.deep.equal([ + { + pathname: 'main.tex', + op: { p: 3, i: 'foo' }, + meta: { ts: this.ts1, user_id: this.user_id }, + v: 42, + }, + { + pathname: 'main.tex', + op: { p: 6, i: 'bar' }, + meta: { ts: this.ts2, user_id: this.user_id, tc: 'tracking-id' }, + v: 43, + }, + ]) + }) + + it('should not merge undos with regular ops', function () { + expect( + this.UpdateCompressor.compressUpdates([ + { + pathname: 'main.tex', + op: { p: 3, i: 'foo' }, + meta: { ts: this.ts1, user_id: this.user_id }, + v: 42, + }, + { + pathname: 'main.tex', + op: { p: 6, i: 'bar', u: true }, + meta: { ts: this.ts2, user_id: this.user_id }, + v: 43, + }, + ]) + ).to.deep.equal([ + { + pathname: 'main.tex', + op: { p: 3, i: 'foo' }, + meta: { ts: this.ts1, user_id: this.user_id }, + v: 42, + }, + { + pathname: 'main.tex', + op: { p: 6, i: 'bar', u: true }, + meta: { ts: this.ts2, user_id: this.user_id }, + v: 43, + }, + ]) + }) + + it('should not merge tracked delete rejections', function () { + expect( + this.UpdateCompressor.compressUpdates([ + { + pathname: 'main.tex', + op: { p: 3, i: 'foo' }, + meta: { ts: this.ts1, user_id: this.user_id }, + v: 42, + }, + { + pathname: 'main.tex', + op: { p: 6, i: 'bar', trackedDeleteRejection: true }, + meta: { ts: this.ts2, user_id: this.user_id }, + v: 43, + }, + ]) + ).to.deep.equal([ + { + pathname: 'main.tex', + op: { p: 3, i: 'foo' }, + meta: { ts: this.ts1, user_id: this.user_id }, + v: 42, + }, + { + pathname: 'main.tex', + op: { p: 6, i: 'bar', trackedDeleteRejection: true }, + meta: { ts: this.ts2, user_id: this.user_id }, + v: 43, + }, + ]) + }) + + it('should preserve history metadata', function () { + expect( + this.UpdateCompressor.compressUpdates([ + { + op: { p: 3, i: 'foo', hpos: 13 }, + meta: { ts: this.ts1, user_id: this.user_id }, + v: 42, + }, + { + op: { p: 6, i: 'bar', hpos: 16 }, + meta: { ts: this.ts2, user_id: this.user_id }, + v: 43, + }, + ]) + ).to.deep.equal([ + { + op: { p: 3, i: 'foobar', hpos: 13 }, + meta: { ts: this.ts1, user_id: this.user_id }, + v: 43, + }, + ]) + }) + + it('should not merge updates from different users', function () { + expect( + this.UpdateCompressor.compressUpdates([ + { + op: { p: 3, i: 'foo', hpos: 13 }, + meta: { ts: this.ts1, user_id: this.user_id }, + v: 42, + }, + { + op: { p: 6, i: 'bar', hpos: 16 }, + meta: { ts: this.ts2, user_id: this.other_user_id }, + v: 43, + }, + ]) + ).to.deep.equal([ + { + op: { p: 3, i: 'foo', hpos: 13 }, + meta: { ts: this.ts1, user_id: this.user_id }, + v: 42, + }, + { + op: { p: 6, i: 'bar', hpos: 16 }, + meta: { ts: this.ts2, user_id: this.other_user_id }, + v: 43, + }, + ]) + }) }) describe('delete - delete', function () { @@ -647,6 +849,95 @@ describe('UpdateCompressor', function () { }, ]) }) + + it('should not merge deletes over tracked changes', function () { + expect( + this.UpdateCompressor.compressUpdates([ + { + op: { p: 3, d: 'foo' }, + meta: { ts: this.ts1, user_id: this.user_id }, + v: 42, + }, + { + op: { + p: 3, + d: 'bar', + trackedChanges: [{ type: 'delete', pos: 2, length: 10 }], + }, + meta: { ts: this.ts2, user_id: this.user_id }, + v: 43, + }, + ]) + ).to.deep.equal([ + { + op: { p: 3, d: 'foo' }, + meta: { ts: this.ts1, user_id: this.user_id }, + v: 42, + }, + { + op: { + p: 3, + d: 'bar', + trackedChanges: [{ type: 'delete', pos: 2, length: 10 }], + }, + meta: { ts: this.ts2, user_id: this.user_id }, + v: 43, + }, + ]) + }) + + it('should preserve history metadata', function () { + expect( + this.UpdateCompressor.compressUpdates([ + { + op: { p: 3, d: 'foo' }, + meta: { ts: this.ts1, user_id: this.user_id }, + v: 42, + }, + { + op: { p: 3, d: 'bar' }, + meta: { ts: this.ts2, user_id: this.user_id }, + v: 43, + }, + ]) + ).to.deep.equal([ + { + op: { p: 3, d: 'foobar' }, + meta: { ts: this.ts1, user_id: this.user_id }, + v: 43, + }, + ]) + }) + + it('should not merge when the deletes are tracked', function () { + // TODO: We should be able to lift that constraint, but it would + // require recalculating the hpos on the second op. + expect( + this.UpdateCompressor.compressUpdates([ + { + op: { p: 3, d: 'foo' }, + meta: { ts: this.ts1, user_id: this.user_id, tc: 'tracking-id' }, + v: 42, + }, + { + op: { p: 3, d: 'bar' }, + meta: { ts: this.ts2, user_id: this.user_id, tc: 'tracking-id' }, + v: 43, + }, + ]) + ).to.deep.equal([ + { + op: { p: 3, d: 'foo' }, + meta: { ts: this.ts1, user_id: this.user_id, tc: 'tracking-id' }, + v: 42, + }, + { + op: { p: 3, d: 'bar' }, + meta: { ts: this.ts2, user_id: this.user_id, tc: 'tracking-id' }, + v: 43, + }, + ]) + }) }) describe('insert - delete', function () { @@ -768,6 +1059,29 @@ describe('UpdateCompressor', function () { }, ]) }) + + it('should preserver history metadata', function () { + expect( + this.UpdateCompressor.compressUpdates([ + { + op: { p: 3, i: 'foo', hpos: 13 }, + meta: { ts: this.ts1, user_id: this.user_id }, + v: 42, + }, + { + op: { p: 5, d: 'o', hpos: 15 }, + meta: { ts: this.ts2, user_id: this.user_id }, + v: 43, + }, + ]) + ).to.deep.equal([ + { + op: { p: 3, i: 'fo', hpos: 13 }, + meta: { ts: this.ts1, user_id: this.user_id }, + v: 43, + }, + ]) + }) }) describe('delete - insert', function () { @@ -815,6 +1129,90 @@ describe('UpdateCompressor', function () { ]) ).to.deep.equal([]) }) + + it('should preserver history metadata', function () { + expect( + this.UpdateCompressor.compressUpdates([ + { + op: { + p: 3, + d: 'one two three four five six seven eight', + hpos: 13, + }, + meta: { ts: this.ts1, user_id: this.user_id, doc_length: 100 }, + v: 42, + }, + { + op: { + p: 3, + i: 'one 2 three four five six seven eight', + hpos: 13, + }, + meta: { ts: this.ts2, user_id: this.user_id, doc_length: 100 }, + v: 43, + }, + ]) + ).to.deep.equal([ + { + op: { p: 7, d: 'two', hpos: 17 }, + meta: { ts: this.ts1, user_id: this.user_id, doc_length: 100 }, + v: 43, + }, + { + op: { p: 7, i: '2', hpos: 17 }, + meta: { ts: this.ts1, user_id: this.user_id, doc_length: 97 }, + v: 43, + }, + ]) + }) + + it('should not merge when tracking changes', function () { + expect( + this.UpdateCompressor.compressUpdates([ + { + op: { p: 3, d: 'one two three four five six seven eight' }, + meta: { + ts: this.ts1, + user_id: this.user_id, + doc_length: 100, + tc: 'tracking-id', + }, + v: 42, + }, + { + op: { p: 3, i: 'one 2 three four five six seven eight' }, + meta: { + ts: this.ts2, + user_id: this.user_id, + doc_length: 100, + tc: 'tracking-id', + }, + v: 43, + }, + ]) + ).to.deep.equal([ + { + op: { p: 3, d: 'one two three four five six seven eight' }, + meta: { + ts: this.ts1, + user_id: this.user_id, + doc_length: 100, + tc: 'tracking-id', + }, + v: 42, + }, + { + op: { p: 3, i: 'one 2 three four five six seven eight' }, + meta: { + ts: this.ts2, + user_id: this.user_id, + doc_length: 100, + tc: 'tracking-id', + }, + v: 43, + }, + ]) + }) }) describe('a long chain of ops', function () { diff --git a/services/project-history/test/unit/js/UpdateTranslator/UpdateTranslatorTests.js b/services/project-history/test/unit/js/UpdateTranslator/UpdateTranslatorTests.js index 2ce7b279ef..ba9a064320 100644 --- a/services/project-history/test/unit/js/UpdateTranslator/UpdateTranslatorTests.js +++ b/services/project-history/test/unit/js/UpdateTranslator/UpdateTranslatorTests.js @@ -831,5 +831,276 @@ describe('UpdateTranslator', function () { }).to.throw('unexpected op type') }) }) + + describe('text updates with history metadata', function () { + it('handles deletes over tracked deletes', function () { + const updates = [ + { + update: { + doc: this.doc_id, + op: [ + { i: 'foo', p: 3, hpos: 5 }, + { + d: 'quux', + p: 10, + hpos: 15, + trackedChanges: [ + { type: 'delete', offset: 2, length: 3 }, + { type: 'delete', offset: 3, length: 1 }, + ], + }, + { c: 'noteworthy', p: 8, t: 'comment-id', hpos: 11, hlen: 14 }, + ], + v: this.version, + meta: { + user_id: this.user_id, + ts: new Date(this.timestamp).getTime(), + pathname: '/main.tex', + doc_length: 20, + history_doc_length: 30, + source: 'some-editor-id', + }, + }, + }, + ] + + const changes = this.UpdateTranslator.convertToChanges( + this.project_id, + updates + ).map(change => change.toRaw()) + + expect(changes).to.deep.equal([ + { + authors: [], + operations: [ + { + pathname: 'main.tex', + textOperation: [5, 'foo', 7, -2, 3, -1, 1, -1, 10], + }, + { + pathname: 'main.tex', + commentId: 'comment-id', + ranges: [{ pos: 11, length: 14 }], + resolved: false, + }, + ], + v2Authors: [this.user_id], + timestamp: this.timestamp, + v2DocVersions: { + '59bfd450e3028c4d40a1e9ab': { + pathname: 'main.tex', + v: 0, + }, + }, + }, + ]) + }) + + it('handles tracked delete rejections specially', function () { + const updates = [ + { + update: { + doc: this.doc_id, + op: [{ i: 'foo', p: 3, trackedDeleteRejection: true }], + v: this.version, + meta: { + user_id: this.user_id, + ts: new Date(this.timestamp).getTime(), + pathname: '/main.tex', + doc_length: 20, + source: 'some-editor-id', + }, + }, + }, + ] + + const changes = this.UpdateTranslator.convertToChanges( + this.project_id, + updates + ).map(change => change.toRaw()) + + expect(changes).to.deep.equal([ + { + authors: [], + operations: [ + { + pathname: 'main.tex', + textOperation: [ + 3, + { + r: 3, + tracking: { + type: 'none', + userId: this.user_id, + ts: new Date(this.timestamp).toISOString(), + }, + }, + 14, + ], + }, + ], + v2Authors: [this.user_id], + timestamp: this.timestamp, + v2DocVersions: { + '59bfd450e3028c4d40a1e9ab': { + pathname: 'main.tex', + v: 0, + }, + }, + }, + ]) + }) + + it('handles tracked changes', function () { + const updates = [ + { + update: { + doc: this.doc_id, + op: [ + { i: 'inserted', p: 5 }, + { d: 'deleted', p: 20 }, + { i: 'rejected deletion', p: 30, trackedDeleteRejection: true }, + { d: 'rejected insertion', p: 50, u: true }, + ], + v: this.version, + meta: { + tc: 'tracked-change-id', + user_id: this.user_id, + ts: new Date(this.timestamp).getTime(), + pathname: '/main.tex', + doc_length: 70, + source: 'some-editor-id', + }, + }, + }, + ] + + const changes = this.UpdateTranslator.convertToChanges( + this.project_id, + updates + ).map(change => change.toRaw()) + + expect(changes).to.deep.equal([ + { + authors: [], + operations: [ + { + pathname: 'main.tex', + textOperation: [ + 5, + { + i: 'inserted', + tracking: { + type: 'insert', + userId: this.user_id, + ts: new Date(this.timestamp).toISOString(), + }, + }, + 7, + { + r: 7, + tracking: { + type: 'delete', + userId: this.user_id, + ts: new Date(this.timestamp).toISOString(), + }, + }, + 3, + { + r: 17, + tracking: { + type: 'none', + userId: this.user_id, + ts: new Date(this.timestamp).toISOString(), + }, + }, + 3, + -18, + 10, + ], + }, + ], + v2Authors: [this.user_id], + timestamp: this.timestamp, + v2DocVersions: { + '59bfd450e3028c4d40a1e9ab': { + pathname: 'main.tex', + v: 0, + }, + }, + }, + ]) + }) + + it('handles a delete over a mix of tracked inserts and tracked deletes', function () { + const updates = [ + { + update: { + doc: this.doc_id, + op: [ + { + d: 'abcdef', + p: 10, + trackedChanges: [ + { type: 'insert', offset: 0, length: 3 }, + { type: 'delete', offset: 2, length: 10 }, + { type: 'insert', offset: 2, length: 2 }, + ], + }, + ], + v: this.version, + meta: { + tc: 'tracking-id', + user_id: this.user_id, + ts: new Date(this.timestamp).getTime(), + pathname: '/main.tex', + doc_length: 20, + history_doc_length: 30, + source: 'some-editor-id', + }, + }, + }, + ] + + const changes = this.UpdateTranslator.convertToChanges( + this.project_id, + updates + ).map(change => change.toRaw()) + + expect(changes).to.deep.equal([ + { + authors: [], + operations: [ + { + pathname: 'main.tex', + textOperation: [ + 10, + -3, + 10, + -2, + { + r: 1, + tracking: { + type: 'delete', + userId: this.user_id, + ts: this.timestamp, + }, + }, + 4, + ], + }, + ], + v2Authors: [this.user_id], + timestamp: this.timestamp, + v2DocVersions: { + '59bfd450e3028c4d40a1e9ab': { + pathname: 'main.tex', + v: 0, + }, + }, + }, + ]) + }) + }) }) })