From 815c29cf823946f8356b599a029052cb33781101 Mon Sep 17 00:00:00 2001 From: Mathias Jakobsen Date: Fri, 5 Apr 2024 11:27:00 +0100 Subject: [PATCH] Merge pull request #17396 from overleaf/mj-version-filtering [overleaf-editor-core+project-history] Filter tracked changes when fetching files GitOrigin-RevId: 935e4c4712f31b77070aec545a849fc6fefedcd9 --- libraries/overleaf-editor-core/lib/file.js | 8 +- .../lib/file_data/index.js | 9 +- .../lib/file_data/lazy_string_file_data.js | 3 +- .../overleaf-editor-core/lib/file_map.js | 9 +- .../overleaf-editor-core/lib/snapshot.js | 5 +- libraries/overleaf-editor-core/lib/types.ts | 2 + .../app/js/HistoryStoreManager.js | 9 +- .../project-history/app/js/SnapshotManager.js | 39 +++- .../SnapshotManager/SnapshotManagerTests.js | 204 +++++++++++++++--- 9 files changed, 235 insertions(+), 53 deletions(-) diff --git a/libraries/overleaf-editor-core/lib/file.js b/libraries/overleaf-editor-core/lib/file.js index db4941bb8a..4fa8394e5d 100644 --- a/libraries/overleaf-editor-core/lib/file.js +++ b/libraries/overleaf-editor-core/lib/file.js @@ -1,3 +1,4 @@ +// @ts-check 'use strict' const _ = require('lodash') @@ -11,6 +12,7 @@ const StringFileData = require('./file_data/string_file_data') /** * @typedef {import("./blob")} Blob * @typedef {import("./types").BlobStore} BlobStore + * @typedef {import("./types").ReadonlyBlobStore} ReadonlyBlobStore * @typedef {import("./types").StringFileRawData} StringFileRawData * @typedef {import("./types").CommentRawData} CommentRawData * @typedef {import("./operation/text_operation")} TextOperation @@ -97,7 +99,7 @@ class File { /** * @param {Blob} blob - * @param {Blob} [blob] + * @param {Blob} [rangesBlob] * @param {Object} [metadata] * @return {File} */ @@ -213,7 +215,7 @@ class File { * @return {File} a new object of the same type */ clone() { - return File.fromRaw(this.toRaw()) + return /** @type {File} */ (File.fromRaw(this.toRaw())) } /** @@ -222,7 +224,7 @@ class File { * operation. * * @param {string} kind - * @param {BlobStore} blobStore + * @param {ReadonlyBlobStore} blobStore * @return {Promise.} for this */ async load(kind, blobStore) { diff --git a/libraries/overleaf-editor-core/lib/file_data/index.js b/libraries/overleaf-editor-core/lib/file_data/index.js index 30f7f2e01c..7d743ae34a 100644 --- a/libraries/overleaf-editor-core/lib/file_data/index.js +++ b/libraries/overleaf-editor-core/lib/file_data/index.js @@ -15,6 +15,7 @@ let StringFileData = null /** * @typedef {import("../types").BlobStore} BlobStore + * @typedef {import("../types").ReadonlyBlobStore} ReadonlyBlobStore * @typedef {import("../operation/edit_operation")} EditOperation * @typedef {import("../types").CommentRawData} CommentRawData */ @@ -131,7 +132,7 @@ class FileData { /** * @function - * @param {BlobStore} blobStore + * @param {ReadonlyBlobStore} blobStore * @return {Promise} * @abstract * @see FileData#load @@ -142,7 +143,7 @@ class FileData { /** * @function - * @param {BlobStore} blobStore + * @param {ReadonlyBlobStore} blobStore * @return {Promise} * @abstract * @see FileData#load @@ -153,7 +154,7 @@ class FileData { /** * @function - * @param {BlobStore} blobStore + * @param {ReadonlyBlobStore} blobStore * @return {Promise} * @abstract * @see FileData#load @@ -165,7 +166,7 @@ class FileData { /** * @see File#load * @param {string} kind - * @param {BlobStore} blobStore + * @param {ReadonlyBlobStore} blobStore * @return {Promise} */ async load(kind, blobStore) { diff --git a/libraries/overleaf-editor-core/lib/file_data/lazy_string_file_data.js b/libraries/overleaf-editor-core/lib/file_data/lazy_string_file_data.js index 07d2219114..3144159980 100644 --- a/libraries/overleaf-editor-core/lib/file_data/lazy_string_file_data.js +++ b/libraries/overleaf-editor-core/lib/file_data/lazy_string_file_data.js @@ -12,6 +12,7 @@ const EditOperationBuilder = require('../operation/edit_operation_builder') /** * @typedef {import('../types').BlobStore} BlobStore + * @typedef {import('../types').ReadonlyBlobStore} ReadonlyBlobStore * @typedef {import('../types').RangesBlob} RangesBlob */ @@ -106,7 +107,7 @@ class LazyStringFileData extends FileData { /** * @inheritdoc - * @param {BlobStore} blobStore + * @param {ReadonlyBlobStore} blobStore * @returns {Promise} */ async toEager(blobStore) { diff --git a/libraries/overleaf-editor-core/lib/file_map.js b/libraries/overleaf-editor-core/lib/file_map.js index d5d8d4887b..79939823df 100644 --- a/libraries/overleaf-editor-core/lib/file_map.js +++ b/libraries/overleaf-editor-core/lib/file_map.js @@ -1,3 +1,4 @@ +// @ts-check 'use strict' const _ = require('lodash') @@ -64,11 +65,12 @@ class FileMap { static FileNotFoundError = FileNotFoundError /** - * @param {Object.} files + * @param {Record} files */ constructor(files) { // create bare object for use as Map // http://ryanmorr.com/true-hash-maps-in-javascript/ + /** @type {Record} */ this.files = Object.create(null) _.assign(this.files, files) checkPathnamesAreUnique(this.files) @@ -221,8 +223,9 @@ class FileMap { /** * Map the files in this map to new values. - * @param {function} iteratee - * @return {Object} + * @template T + * @param {(file: File | null) => T} iteratee + * @return {Record} */ map(iteratee) { return _.mapValues(this.files, iteratee) diff --git a/libraries/overleaf-editor-core/lib/snapshot.js b/libraries/overleaf-editor-core/lib/snapshot.js index 46b9b71c62..2d5be82299 100644 --- a/libraries/overleaf-editor-core/lib/snapshot.js +++ b/libraries/overleaf-editor-core/lib/snapshot.js @@ -10,6 +10,7 @@ const FILE_LOAD_CONCURRENCY = 50 /** * @typedef {import("./types").BlobStore} BlobStore + * @typedef {import("./types").ReadonlyBlobStore} ReadonlyBlobStore * @typedef {import("./change")} Change * @typedef {import("./operation/text_operation")} TextOperation */ @@ -167,7 +168,7 @@ class Snapshot { * Ignore recoverable errors (caused by historical bad data) unless opts.strict is true * * @param {Change[]} changes - * @param {object} opts + * @param {object} [opts] * @param {boolean} opts.strict - do not ignore recoverable errors */ applyAll(changes, opts) { @@ -196,7 +197,7 @@ class Snapshot { * Load all of the files in this snapshot. * * @param {string} kind see {File#load} - * @param {BlobStore} blobStore + * @param {ReadonlyBlobStore} blobStore * @return {Promise} an object where keys are the pathnames and * values are the files in the snapshot */ diff --git a/libraries/overleaf-editor-core/lib/types.ts b/libraries/overleaf-editor-core/lib/types.ts index 3227d3baf9..f38696a815 100644 --- a/libraries/overleaf-editor-core/lib/types.ts +++ b/libraries/overleaf-editor-core/lib/types.ts @@ -8,6 +8,8 @@ export type BlobStore = { getObject(hash: string): Promise } +export type ReadonlyBlobStore = Pick + export type RangesBlob = { comments: CommentsListRawData trackedChanges: TrackedChangeRawData[] diff --git a/services/project-history/app/js/HistoryStoreManager.js b/services/project-history/app/js/HistoryStoreManager.js index a2c8f44539..232e24c52a 100644 --- a/services/project-history/app/js/HistoryStoreManager.js +++ b/services/project-history/app/js/HistoryStoreManager.js @@ -361,8 +361,13 @@ class BlobStore { this.projectId = projectId } - getString(hash) { - return getProjectBlobAsync(this.projectId, hash) + async getString(hash) { + return await getProjectBlobAsync(this.projectId, hash) + } + + async getObject(hash) { + const string = await this.getString(hash) + return JSON.parse(string) } } diff --git a/services/project-history/app/js/SnapshotManager.js b/services/project-history/app/js/SnapshotManager.js index 1ebaab143b..fa291d83b1 100644 --- a/services/project-history/app/js/SnapshotManager.js +++ b/services/project-history/app/js/SnapshotManager.js @@ -1,3 +1,4 @@ +// @ts-check import Core from 'overleaf-editor-core' import { Readable as StringStream } from 'stream' import BPromise from 'bluebird' @@ -6,10 +7,19 @@ import * as HistoryStoreManager from './HistoryStoreManager.js' import * as WebApiManager from './WebApiManager.js' import * as Errors from './Errors.js' +/** @typedef {import('overleaf-editor-core').Snapshot} Snapshot */ + StringStream.prototype._read = function () {} const MAX_REQUESTS = 4 // maximum number of parallel requests to v1 history service +/** + * + * @param {string} projectId + * @param {number} version + * @param {string} pathname + * @param {Function} callback + */ export function getFileSnapshotStream(projectId, version, pathname, callback) { _getSnapshotAtVersion(projectId, version, (error, snapshot) => { if (error) { @@ -34,7 +44,7 @@ export function getFileSnapshotStream(projectId, version, pathname, callback) { .load('eager', HistoryStoreManager.getBlobStore(historyId)) .then(() => { const stream = new StringStream() - stream.push(file.getContent()) + stream.push(file.getContent({ filterTrackedDeletes: true })) stream.push(null) callback(null, stream) }) @@ -70,7 +80,18 @@ export function getProjectSnapshot(projectId, version, callback) { .then(() => { const data = { projectId, - files: snapshot.getFileMap().files, + files: snapshot.getFileMap().map(file => { + if (!file) { + return null + } + const content = file.getContent({ + filterTrackedDeletes: true, + }) + if (content === null) { + return { data: { hash: file.getHash() } } + } + return { data: { content } } + }), } callback(null, data) }) @@ -79,6 +100,12 @@ export function getProjectSnapshot(projectId, version, callback) { }) } +/** + * + * @param {string} projectId + * @param {number} version + * @param {Function} callback + */ function _getSnapshotAtVersion(projectId, version, callback) { WebApiManager.getHistoryId(projectId, (error, historyId) => { if (error) { @@ -133,10 +160,10 @@ function _loadFilesLimit(snapshot, kind, blobStore) { return BPromise.map( fileList, file => { - // only load changed files, others can be dereferenced from their - // blobs (this method is only used by the git bridge which - // understands how to load blobs). - if (!file.isEditable() || file.getHash()) { + // only load changed files or files with tracked changes, others can be + // dereferenced from their blobs (this method is only used by the git + // bridge which understands how to load blobs). + if (!file.isEditable() || (file.getHash() && !file.getRangesHash())) { return } return file.load(kind, blobStore) diff --git a/services/project-history/test/unit/js/SnapshotManager/SnapshotManagerTests.js b/services/project-history/test/unit/js/SnapshotManager/SnapshotManagerTests.js index 758dddbb18..71cff594dc 100644 --- a/services/project-history/test/unit/js/SnapshotManager/SnapshotManagerTests.js +++ b/services/project-history/test/unit/js/SnapshotManager/SnapshotManagerTests.js @@ -44,7 +44,28 @@ describe('SnapshotManager', function () { describe('getFileSnapshotStream', function () { beforeEach(function () { this.WebApiManager.getHistoryId.yields(null, this.historyId) - return this.HistoryStoreManager.getChunkAtVersion.yields(null, { + this.ranges = { + comments: [], + trackedChanges: [ + { + range: { pos: 4, length: 6 }, + tracking: { + userId: 'user-1', + ts: '2024-01-01T00:00:00.000Z', + type: 'delete', + }, + }, + { + range: { pos: 35, length: 5 }, + tracking: { + userId: 'user-1', + ts: '2024-01-01T00:00:00.000Z', + type: 'insert', + }, + }, + ], + } + this.HistoryStoreManager.getChunkAtVersion.yields(null, { chunk: { history: { snapshot: { @@ -53,6 +74,11 @@ describe('SnapshotManager', function () { hash: '35c9bd86574d61dcadbce2fdd3d4a0684272c6ea', stringLength: 41, }, + 'file_with_ranges.tex': { + hash: '5d2781d78fa5a97b7bafa849fe933dfc9dc93eba', + rangesHash: '73061952d41ce54825e2fc1c36b4cf736d5fb62f', + stringLength: 41, + }, 'binary.png': { hash: 'c6654ea913979e13e22022653d284444f284a172', byteLength: 41, @@ -94,7 +120,7 @@ describe('SnapshotManager', function () { }) }) - describe('of a text file', function () { + describe('of a text file with no tracked changes', function () { beforeEach(function (done) { this.HistoryStoreManager.getBlobStore.withArgs(this.historyId).returns({ getString: BPromise.promisify( @@ -178,6 +204,56 @@ Seven eight nine\ }) }) + describe('of a text file with tracked changes', function () { + beforeEach(function (done) { + this.HistoryStoreManager.getBlobStore.withArgs(this.historyId).returns({ + getString: (this.getString = sinon + .stub() + .resolves('the quick brown fox jumps over the lazy dog')), + getObject: (this.getObject = sinon.stub().resolves(this.ranges)), + }) + this.SnapshotManager.getFileSnapshotStream( + this.projectId, + 5, + 'file_with_ranges.tex', + (error, stream) => { + this.stream = stream + done(error) + } + ) + }) + + it('should get the overleaf id', function () { + this.WebApiManager.getHistoryId + .calledWith(this.projectId) + .should.equal(true) + }) + + it('should get the chunk', function () { + this.HistoryStoreManager.getChunkAtVersion + .calledWith(this.projectId, this.historyId, 5) + .should.equal(true) + }) + + it('should get the blob of the starting snapshot', function () { + this.getString + .calledWith('5d2781d78fa5a97b7bafa849fe933dfc9dc93eba') + .should.equal(true) + }) + + it('should get the blob of the ranges', function () { + this.getObject + .calledWith('73061952d41ce54825e2fc1c36b4cf736d5fb62f') + .should.equal(true) + }) + + it('should return a string stream with the text content without the tracked deletes', function () { + expect(this.stream.read().toString()).to.equal( + 'the brown fox jumps over the lazy dog' + ) + }) + }) + describe('of a binary file', function () { beforeEach(function (done) { this.HistoryStoreManager.getProjectBlobStream @@ -245,6 +321,27 @@ Seven eight nine\ describe('getProjectSnapshot', function () { beforeEach(function () { this.WebApiManager.getHistoryId.yields(null, this.historyId) + this.ranges = { + comments: [], + trackedChanges: [ + { + range: { pos: 5, length: 6 }, + tracking: { + userId: 'user-1', + ts: '2024-01-01T00:00:00.000Z', + type: 'delete', + }, + }, + { + range: { pos: 12, length: 5 }, + tracking: { + userId: 'user-1', + ts: '2024-01-01T00:00:00.000Z', + type: 'insert', + }, + }, + ], + } return this.HistoryStoreManager.getChunkAtVersion.yields(null, { chunk: (this.chunk = { history: { @@ -258,6 +355,16 @@ Seven eight nine\ hash: '35c9bd86574d61dcadbce2fdd3d4a0684272c6ea', stringLength: 41, }, + 'with_ranges_unchanged.tex': { + hash: '35c9bd86574d61dcadbce2fdd3d4a0684272c6ea', + rangesHash: '2e59fe3dbd5310703f89236d589d0b35db169cdf', + stringLength: 41, + }, + 'with_ranges_changed.tex': { + hash: '35c9bd86574d61dcadbce2fdd3d4a0684272c6ea', + rangesHash: '2e59fe3dbd5310703f89236d589d0b35db169cdf', + stringLength: 41, + }, 'binary.png': { hash: 'c6654ea913979e13e22022653d284444f284a172', byteLength: 41, @@ -285,6 +392,16 @@ Seven eight nine\ timestamp: '2017-12-04T10:29:22.905Z', authors: [31], }, + { + operations: [ + { + pathname: 'with_ranges_changed.tex', + textOperation: [41, '\n\nSeven eight'], + }, + ], + timestamp: '2017-12-04T10:29:25.905Z', + authors: [31], + }, ], }, startVersion: 3, @@ -302,22 +419,20 @@ Seven eight nine\ describe('of project', function () { beforeEach(function (done) { this.HistoryStoreManager.getBlobStore.withArgs(this.historyId).returns({ - getString: BPromise.promisify( - (this.getString = sinon.stub().yields( - null, - `\ + getString: (this.getString = sinon.stub().resolves( + `\ Hello world One two three Four five six\ -`.replace(/^\t/g, '') - )) - ), +` + )), + getObject: (this.getObject = sinon.stub().resolves(this.ranges)), }) this.SnapshotManager.getProjectSnapshot( this.projectId, - 5, + 6, (error, data) => { this.data = data done(error) @@ -326,36 +441,61 @@ Four five six\ }) it('should get the overleaf id', function () { - return this.WebApiManager.getHistoryId + this.WebApiManager.getHistoryId .calledWith(this.projectId) .should.equal(true) }) it('should get the chunk', function () { - return this.HistoryStoreManager.getChunkAtVersion - .calledWith(this.projectId, this.historyId, 5) + this.HistoryStoreManager.getChunkAtVersion + .calledWith(this.projectId, this.historyId, 6) .should.equal(true) }) - return it('should produce the snapshot file data', function () { - expect(this.data).to.have.all.keys(['files', 'projectId']) - expect(this.data.projectId).to.equal('project-id-123') - expect(this.data.files['main.tex']).to.exist - expect(this.data.files['unchanged.tex']).to.exist - expect(this.data.files['binary.png']).to.exist - // files with operations in the chunk should return content only - expect(this.data.files['main.tex'].data.content).to.equal( - 'Hello world\n\nOne two three\n\nFour five six\n\nSeven eight nine' - ) - expect(this.data.files['main.tex'].data.hash).to.not.exist - // unchanged files in the chunk should return hash only - expect(this.data.files['unchanged.tex'].data.hash).to.equal( - '35c9bd86574d61dcadbce2fdd3d4a0684272c6ea' - ) - expect(this.data.files['unchanged.tex'].data.content).to.not.exist - return expect(this.data.files['binary.png'].data.hash).to.equal( - 'c6654ea913979e13e22022653d284444f284a172' - ) + it('should get the ranges for the file with tracked changes', function () { + this.getObject.calledWith('2e59fe3dbd5310703f89236d589d0b35db169cdf') + }) + + it('should produce the snapshot file data', function () { + expect(this.data).to.deep.equal({ + files: { + 'main.tex': { + // files with operations in the chunk should return content only + data: { + content: + 'Hello world\n\nOne two three\n\nFour five six\n\nSeven eight nine', + }, + }, + 'unchanged.tex': { + // unchanged files in the chunk should return hash only + data: { + hash: '35c9bd86574d61dcadbce2fdd3d4a0684272c6ea', + }, + }, + 'with_ranges_changed.tex': { + // files in the chunk with tracked changes should return content + // without the tracked deletes + data: { + content: + 'Hello\n\nOne two three\n\nFour five six\n\nSeven eight', + }, + }, + 'with_ranges_unchanged.tex': { + // files in the chunk with tracked changes should return content + // without the tracked deletes, even if they are unchanged + data: { + content: 'Hello\n\nOne two three\n\nFour five six', + }, + }, + 'binary.png': { + // binary files in the chunk should return hash only + data: { + hash: 'c6654ea913979e13e22022653d284444f284a172', + }, + }, + }, + projectId: 'project-id-123', + }) }) })