From 36ad15c4057b83364564b4503534ef25e770876d Mon Sep 17 00:00:00 2001 From: Mathias Jakobsen Date: Fri, 19 Apr 2024 14:05:27 +0100 Subject: [PATCH] Merge pull request #17859 from overleaf/mj-project-history-ranges-at-version [overleaf-editor-core+project-history] Add endpoint to fetch ranges from version GitOrigin-RevId: fbe8e8ef3636b344006375a92057cfc580a74616 --- libraries/overleaf-editor-core/lib/file.js | 9 + .../lib/file_data/index.js | 13 + .../lib/file_data/string_file_data.js | 5 + .../project-history/app/js/HttpController.js | 15 + services/project-history/app/js/Router.js | 5 + .../project-history/app/js/SnapshotManager.js | 149 +++++++ services/project-history/app/js/types.ts | 23 ++ .../SnapshotManager/SnapshotManagerTests.js | 375 ++++++++++++++++++ 8 files changed, 594 insertions(+) diff --git a/libraries/overleaf-editor-core/lib/file.js b/libraries/overleaf-editor-core/lib/file.js index b5fb0c6ffa..9e0f419ad0 100644 --- a/libraries/overleaf-editor-core/lib/file.js +++ b/libraries/overleaf-editor-core/lib/file.js @@ -17,6 +17,7 @@ const StringFileData = require('./file_data/string_file_data') * @typedef {import("./types").CommentRawData} CommentRawData * @typedef {import("./file_data/comment_list")} CommentList * @typedef {import("./operation/text_operation")} TextOperation + * @typedef {import("./file_data/tracked_change_list")} TrackedChangeList * @typedef {{filterTrackedDeletes?: boolean}} FileGetContentOptions */ @@ -210,6 +211,14 @@ class File { return this.data.getComments() } + /** + * Get the tracked changes for this file. + * @return {TrackedChangeList} + */ + getTrackedChanges() { + return this.data.getTrackedChanges() + } + /** * Clone a file. * diff --git a/libraries/overleaf-editor-core/lib/file_data/index.js b/libraries/overleaf-editor-core/lib/file_data/index.js index 5137c778f7..792fc802d6 100644 --- a/libraries/overleaf-editor-core/lib/file_data/index.js +++ b/libraries/overleaf-editor-core/lib/file_data/index.js @@ -21,6 +21,7 @@ let StringFileData = null * @typedef {import("../operation/edit_operation")} EditOperation * @typedef {import("../file_data/comment_list")} CommentList * @typedef {import("../types").CommentRawData} CommentRawData + * @typedef {import("../file_data/tracked_change_list")} TrackedChangeList */ /** @@ -199,6 +200,18 @@ class FileData { getComments() { throw new Error('getComments not implemented for ' + JSON.stringify(this)) } + + /** + * @see File#getTrackedChanges + * @function + * @return {TrackedChangeList} + * @abstract + */ + getTrackedChanges() { + throw new Error( + 'getTrackedChanges not implemented for ' + JSON.stringify(this) + ) + } } module.exports = FileData diff --git a/libraries/overleaf-editor-core/lib/file_data/string_file_data.js b/libraries/overleaf-editor-core/lib/file_data/string_file_data.js index 6e8859f219..f82b3580a4 100644 --- a/libraries/overleaf-editor-core/lib/file_data/string_file_data.js +++ b/libraries/overleaf-editor-core/lib/file_data/string_file_data.js @@ -112,6 +112,11 @@ class StringFileData extends FileData { return this.comments } + /** @inheritdoc */ + getTrackedChanges() { + return this.trackedChanges + } + /** * @inheritdoc * @returns {Promise} diff --git a/services/project-history/app/js/HttpController.js b/services/project-history/app/js/HttpController.js index ac21c3d95c..643fd4bcde 100644 --- a/services/project-history/app/js/HttpController.js +++ b/services/project-history/app/js/HttpController.js @@ -201,6 +201,21 @@ export function getFileSnapshot(req, res, next) { ) } +export function getRangesSnapshot(req, res, next) { + const { project_id: projectId, version, pathname } = req.params + SnapshotManager.getRangesSnapshot( + projectId, + version, + pathname, + (err, ranges) => { + if (err) { + return next(OError.tag(err)) + } + res.json(ranges) + } + ) +} + export function getProjectSnapshot(req, res, next) { const { project_id: projectId, version } = req.params SnapshotManager.getProjectSnapshot( diff --git a/services/project-history/app/js/Router.js b/services/project-history/app/js/Router.js index 5d514b3fba..a7f420ad88 100644 --- a/services/project-history/app/js/Router.js +++ b/services/project-history/app/js/Router.js @@ -143,6 +143,11 @@ export function initialize(app) { HttpController.getFileSnapshot ) + app.get( + '/project/:project_id/ranges/version/:version/:pathname', + HttpController.getRangesSnapshot + ) + app.get( '/project/:project_id/version/:version', HttpController.getProjectSnapshot diff --git a/services/project-history/app/js/SnapshotManager.js b/services/project-history/app/js/SnapshotManager.js index 95e69bdd1a..0f35c36de6 100644 --- a/services/project-history/app/js/SnapshotManager.js +++ b/services/project-history/app/js/SnapshotManager.js @@ -11,6 +11,7 @@ import * as Errors from './Errors.js' /** * @typedef {import('stream').Readable} ReadableStream * @typedef {import('overleaf-editor-core').Snapshot} Snapshot + * @typedef {import('./types').RangesSnapshot} RangesSnapshot */ StringStream.prototype._read = function () {} @@ -50,6 +51,151 @@ async function getFileSnapshotStream(projectId, version, pathname) { } } +/** + * Constructs a snapshot of the ranges in a document-updater compatible format. + * Positions will be relative to a document where tracked deletes have been + * removed from the string. This also means that if a tracked delete overlaps + * a comment range, the comment range will be truncated. + * + * @param {string} projectId + * @param {number} version + * @param {string} pathname + * @returns {Promise} + */ +async function getRangesSnapshot(projectId, version, pathname) { + const snapshot = await _getSnapshotAtVersion(projectId, version) + const file = snapshot.getFile(pathname) + if (!file) { + throw new Errors.NotFoundError(`${pathname} not found`, { + projectId, + version, + pathname, + }) + } + if (!file.isEditable()) { + throw new Error('File is not editable') + } + const historyId = await WebApiManager.promises.getHistoryId(projectId) + await file.load('eager', HistoryStoreManager.getBlobStore(historyId)) + const content = file.getContent() + if (!content) { + throw new Error('Unable to read file contents') + } + const trackedChanges = file.getTrackedChanges().asSorted() + const comments = file.getComments() + const docUpdaterCompatibleTrackedChanges = [] + + let trackedDeletionOffset = 0 + for (const trackedChange of trackedChanges) { + const isTrackedDeletion = trackedChange.tracking.type === 'delete' + const trackedChangeContent = content.slice( + trackedChange.range.start, + trackedChange.range.end + ) + const tcContent = isTrackedDeletion + ? { d: trackedChangeContent } + : { i: trackedChangeContent } + docUpdaterCompatibleTrackedChanges.push({ + op: { + p: trackedChange.range.start - trackedDeletionOffset, + ...tcContent, + }, + metadata: { + ts: trackedChange.tracking.ts.toISOString(), + user_id: trackedChange.tracking.userId, + }, + }) + if (isTrackedDeletion) { + trackedDeletionOffset += trackedChange.range.length + } + } + + // Comments are shifted left by the length of any previous tracked deletions. + // If they overlap with a tracked deletion, they are truncated. + // + // Example: + // { } comment + // [ ] tracked deletion + // the quic[k {b]rown [fox] jum[ps} ove]r the lazy dog + // => rown jum + // starting at position 8 + const trackedDeletions = trackedChanges.filter( + tc => tc.tracking.type === 'delete' + ) + const docUpdaterCompatibleComments = [] + for (const comment of comments) { + trackedDeletionOffset = 0 + let trackedDeletionIndex = 0 + for (const commentRange of comment.ranges) { + let commentRangeContent = '' + let offsetFromOverlappingRangeAtStart = 0 + while ( + trackedDeletionIndex < trackedDeletions.length && + trackedDeletions[trackedDeletionIndex].range.start < + commentRange.start && + trackedDeletions[trackedDeletionIndex].range.end <= commentRange.start + ) { + // Skip over tracked deletions that are before the current comment range + trackedDeletionOffset += + trackedDeletions[trackedDeletionIndex].range.length + trackedDeletionIndex++ + } + + if ( + trackedDeletions[trackedDeletionIndex]?.range.start < commentRange.start + ) { + // There's overlap with a tracked deletion, move the position left and + // truncate the overlap + offsetFromOverlappingRangeAtStart = + commentRange.start - + trackedDeletions[trackedDeletionIndex].range.start + } + + // The position of the comment in the document after tracked deletions + const position = + commentRange.start - + trackedDeletionOffset - + offsetFromOverlappingRangeAtStart + + let cursor = commentRange.start + while (cursor < commentRange.end) { + const trackedDeletion = trackedDeletions[trackedDeletionIndex] + if ( + !trackedDeletion || + trackedDeletion.range.start >= commentRange.end + ) { + // We've run out of relevant tracked changes + commentRangeContent += content.slice(cursor, commentRange.end) + break + } + if (trackedDeletion.range.start > cursor) { + // There's a gap between the current cursor and the tracked deletion + commentRangeContent += content.slice( + cursor, + trackedDeletion.range.start + ) + } + // Skip to the end of the tracked delete + cursor = trackedDeletion.range.end + trackedDeletionIndex++ + trackedDeletionOffset += trackedDeletion.range.length + } + docUpdaterCompatibleComments.push({ + op: { + p: position, + c: commentRangeContent, + t: comment.id, + }, + }) + } + } + + return { + changes: docUpdaterCompatibleTrackedChanges, + comments: docUpdaterCompatibleComments, + } +} + // Returns project snapshot containing the document content for files with // text operations in the relevant chunk, and hashes for unmodified/binary // files. Used by git bridge to get the state of the project. @@ -135,15 +281,18 @@ async function _loadFilesLimit(snapshot, kind, blobStore) { const getFileSnapshotStreamCb = callbackify(getFileSnapshotStream) const getProjectSnapshotCb = callbackify(getProjectSnapshot) const getLatestSnapshotCb = callbackify(getLatestSnapshot) +const getRangesSnapshotCb = callbackify(getRangesSnapshot) export { getFileSnapshotStreamCb as getFileSnapshotStream, getProjectSnapshotCb as getProjectSnapshot, getLatestSnapshotCb as getLatestSnapshot, + getRangesSnapshotCb as getRangesSnapshot, } export const promises = { getFileSnapshotStream, getProjectSnapshot, getLatestSnapshot, + getRangesSnapshot, } diff --git a/services/project-history/app/js/types.ts b/services/project-history/app/js/types.ts index b52b24e20d..f5ac040ac2 100644 --- a/services/project-history/app/js/types.ts +++ b/services/project-history/app/js/types.ts @@ -123,3 +123,26 @@ export type RawScanOp = | { r: number; tracking?: TrackingProps } | { i: string; tracking?: TrackingProps; commentIds?: string[] } | { d: number; tracking?: TrackingProps } + +export type TrackedChangeSnapshot = { + op: { + p: number + } & ({ d: string } | { i: string }) + metadata: { + ts: string + user_id: string + } +} + +export type CommentSnapshot = { + op: { + p: number + t: string + c: string + } +} + +export type RangesSnapshot = { + changes: TrackedChangeSnapshot[] + comments: CommentSnapshot[] +} diff --git a/services/project-history/test/unit/js/SnapshotManager/SnapshotManagerTests.js b/services/project-history/test/unit/js/SnapshotManager/SnapshotManagerTests.js index a71e1b12da..a2d24a1cb5 100644 --- a/services/project-history/test/unit/js/SnapshotManager/SnapshotManagerTests.js +++ b/services/project-history/test/unit/js/SnapshotManager/SnapshotManagerTests.js @@ -579,4 +579,379 @@ Four five six\ }) }) }) + + describe('getRangesSnapshot', function () { + beforeEach(async function () { + this.WebApiManager.promises.getHistoryId.resolves(this.historyId) + this.HistoryStoreManager.promises.getChunkAtVersion.resolves({ + chunk: (this.chunk = { + history: { + snapshot: { + files: { + 'main.tex': { + hash: (this.fileHash = + '5d2781d78fa5a97b7bafa849fe933dfc9dc93eba'), + rangesHash: (this.rangesHash = + '73061952d41ce54825e2fc1c36b4cf736d5fb62f'), + stringLength: 41, + }, + }, + }, + changes: [], + }, + startVersion: 1, + authors: [ + { + id: 31, + email: 'author@example.com', + name: 'Author', + }, + ], + }), + }) + + this.HistoryStoreManager.getBlobStore.withArgs(this.historyId).returns({ + getString: (this.getString = sinon.stub()), + getObject: (this.getObject = sinon.stub()), + }) + + this.getString.resolves('the quick brown fox jumps over the lazy dog') + }) + + describe('with tracked deletes', function () { + beforeEach(async function () { + this.getObject.resolves({ + trackedChanges: [ + { + // 'quick ' + range: { + pos: 4, + length: 6, + }, + tracking: { + type: 'delete', + userId: '31', + ts: '2023-01-01T00:00:00.000Z', + }, + }, + { + // 'fox ' + range: { + pos: 16, + length: 4, + }, + tracking: { + type: 'delete', + userId: '31', + ts: '2023-01-01T00:00:00.000Z', + }, + }, + { + // 'lazy ' + range: { + pos: 35, + length: 5, + }, + tracking: { + type: 'insert', + userId: '31', + ts: '2023-01-01T00:00:00.000Z', + }, + }, + { + // 'dog' + range: { + pos: 40, + length: 3, + }, + tracking: { + type: 'delete', + userId: '31', + ts: '2023-01-01T00:00:00.000Z', + }, + }, + ], + }) + this.data = await this.SnapshotManager.promises.getRangesSnapshot( + this.projectId, + 1, + 'main.tex' + ) + }) + + it("doesn't shift the tracked delete by itself", function () { + expect(this.data.changes[0].op.p).to.eq(4) + }) + + it('should move subsequent tracked changes by the length of previous deletes', function () { + expect(this.data.changes[1].op.p).to.eq(16 - 6) + expect(this.data.changes[2].op.p).to.eq(35 - 6 - 4) + }) + + it("shouldn't move subsequent tracked changes by previous inserts", function () { + expect(this.data.changes[3].op.p).to.eq(40 - 6 - 4) + }) + }) + + describe('with comments and tracked deletes', function () { + beforeEach(async function () { + this.getObject.resolves({ + // the quick brown fox jumps over the lazy dog + trackedChanges: [ + { + // 'e qui' + range: { + pos: 2, + length: 5, + }, + tracking: { + type: 'delete', + userId: '31', + ts: '2023-01-01T00:00:00.000Z', + }, + }, + { + // 'r' + range: { + pos: 11, + length: 1, + }, + tracking: { + type: 'delete', + userId: '31', + ts: '2023-01-01T00:00:00.000Z', + }, + }, + { + // 'er th' + range: { + pos: 28, + length: 5, + }, + tracking: { + type: 'delete', + userId: '31', + ts: '2023-01-01T00:00:00.000Z', + }, + }, + ], + comments: [ + { + id: 'comment-1', + ranges: [ + // 'quick' + { + pos: 4, + length: 5, + }, + // 'brown' + { + pos: 10, + length: 5, + }, + // 'over' + { + pos: 26, + length: 4, + }, + ], + resolved: false, + }, + ], + }) + this.data = await this.SnapshotManager.promises.getRangesSnapshot( + this.projectId, + 1, + 'main.tex' + ) + }) + + it('should move the comment to the start of the tracked delete and remove overlapping text', function () { + expect(this.data.comments[0].op.p).to.eq(2) + expect(this.data.comments[0].op.c).to.eq('ck') + }) + + it('should remove overlapping text in middle of comment', function () { + expect(this.data.comments[1].op.p).to.eq(5) + expect(this.data.comments[1].op.c).to.eq('bown') + }) + + it('should remove overlapping text at end of comment', function () { + expect(this.data.comments[2].op.p).to.eq(20) + expect(this.data.comments[2].op.c).to.eq('ov') + }) + }) + + describe('with multiple tracked changes and comments', function () { + beforeEach(async function () { + this.getObject.resolves({ + trackedChanges: [ + { + // 'quick ' + range: { + pos: 4, + length: 6, + }, + tracking: { + type: 'delete', + userId: '31', + ts: '2023-01-01T00:00:00.000Z', + }, + }, + { + // 'brown ' + range: { + pos: 10, + length: 6, + }, + tracking: { + type: 'insert', + userId: '31', + ts: '2024-01-01T00:00:00.000Z', + }, + }, + { + // 'lazy ' + range: { + pos: 35, + length: 5, + }, + tracking: { + type: 'delete', + userId: '31', + ts: '2024-01-01T00:00:00.000Z', + }, + }, + ], + comments: [ + { + id: 'comment-1', + // 'quick', 'brown', 'lazy' + ranges: [ + { + pos: 4, + length: 5, + }, + { + pos: 10, + length: 5, + }, + { + pos: 35, + length: 4, + }, + ], + resolved: false, + }, + { + id: 'comment-2', + // 'the', 'the' + ranges: [ + { + pos: 0, + length: 3, + }, + { + pos: 31, + length: 3, + }, + ], + resolved: true, + }, + ], + }) + + this.data = await this.SnapshotManager.promises.getRangesSnapshot( + this.projectId, + 1, + 'main.tex' + ) + }) + + it('looks up ranges', function () { + expect(this.getObject).to.have.been.calledWith(this.rangesHash) + expect(this.getString).to.have.been.calledWith(this.fileHash) + }) + + it('should get the chunk', function () { + expect( + this.HistoryStoreManager.promises.getChunkAtVersion + ).to.have.been.calledWith(this.projectId, this.historyId, 1) + }) + + it('returns the ranges with content and adjusted positions to ignore tracked deletes', function () { + expect(this.data).to.deep.equal({ + changes: [ + { + metadata: { + ts: '2023-01-01T00:00:00.000Z', + user_id: '31', + }, + op: { + d: 'quick ', + p: 4, + }, + }, + { + metadata: { + ts: '2024-01-01T00:00:00.000Z', + user_id: '31', + }, + op: { + i: 'brown ', + p: 4, + }, + }, + { + metadata: { + ts: '2024-01-01T00:00:00.000Z', + user_id: '31', + }, + op: { + d: 'lazy ', + p: 29, + }, + }, + ], + comments: [ + { + op: { + c: '', + p: 4, + t: 'comment-1', + }, + }, + { + op: { + c: 'brown', + p: 4, + t: 'comment-1', + }, + }, + { + op: { + c: '', + p: 29, + t: 'comment-1', + }, + }, + { + op: { + c: 'the', + p: 0, + t: 'comment-2', + }, + }, + { + op: { + c: 'the', + p: 25, + t: 'comment-2', + }, + }, + ], + }) + }) + }) + }) })