Merge pull request #17396 from overleaf/mj-version-filtering

[overleaf-editor-core+project-history] Filter tracked changes when fetching files

GitOrigin-RevId: 935e4c4712f31b77070aec545a849fc6fefedcd9
This commit is contained in:
Mathias Jakobsen 2024-04-05 11:27:00 +01:00 committed by Copybot
parent 178485715f
commit 815c29cf82
9 changed files with 235 additions and 53 deletions

View file

@ -1,3 +1,4 @@
// @ts-check
'use strict' 'use strict'
const _ = require('lodash') const _ = require('lodash')
@ -11,6 +12,7 @@ const StringFileData = require('./file_data/string_file_data')
/** /**
* @typedef {import("./blob")} Blob * @typedef {import("./blob")} Blob
* @typedef {import("./types").BlobStore} BlobStore * @typedef {import("./types").BlobStore} BlobStore
* @typedef {import("./types").ReadonlyBlobStore} ReadonlyBlobStore
* @typedef {import("./types").StringFileRawData} StringFileRawData * @typedef {import("./types").StringFileRawData} StringFileRawData
* @typedef {import("./types").CommentRawData} CommentRawData * @typedef {import("./types").CommentRawData} CommentRawData
* @typedef {import("./operation/text_operation")} TextOperation * @typedef {import("./operation/text_operation")} TextOperation
@ -97,7 +99,7 @@ class File {
/** /**
* @param {Blob} blob * @param {Blob} blob
* @param {Blob} [blob] * @param {Blob} [rangesBlob]
* @param {Object} [metadata] * @param {Object} [metadata]
* @return {File} * @return {File}
*/ */
@ -213,7 +215,7 @@ class File {
* @return {File} a new object of the same type * @return {File} a new object of the same type
*/ */
clone() { clone() {
return File.fromRaw(this.toRaw()) return /** @type {File} */ (File.fromRaw(this.toRaw()))
} }
/** /**
@ -222,7 +224,7 @@ class File {
* operation. * operation.
* *
* @param {string} kind * @param {string} kind
* @param {BlobStore} blobStore * @param {ReadonlyBlobStore} blobStore
* @return {Promise.<File>} for this * @return {Promise.<File>} for this
*/ */
async load(kind, blobStore) { async load(kind, blobStore) {

View file

@ -15,6 +15,7 @@ let StringFileData = null
/** /**
* @typedef {import("../types").BlobStore} BlobStore * @typedef {import("../types").BlobStore} BlobStore
* @typedef {import("../types").ReadonlyBlobStore} ReadonlyBlobStore
* @typedef {import("../operation/edit_operation")} EditOperation * @typedef {import("../operation/edit_operation")} EditOperation
* @typedef {import("../types").CommentRawData} CommentRawData * @typedef {import("../types").CommentRawData} CommentRawData
*/ */
@ -131,7 +132,7 @@ class FileData {
/** /**
* @function * @function
* @param {BlobStore} blobStore * @param {ReadonlyBlobStore} blobStore
* @return {Promise<FileData>} * @return {Promise<FileData>}
* @abstract * @abstract
* @see FileData#load * @see FileData#load
@ -142,7 +143,7 @@ class FileData {
/** /**
* @function * @function
* @param {BlobStore} blobStore * @param {ReadonlyBlobStore} blobStore
* @return {Promise<FileData>} * @return {Promise<FileData>}
* @abstract * @abstract
* @see FileData#load * @see FileData#load
@ -153,7 +154,7 @@ class FileData {
/** /**
* @function * @function
* @param {BlobStore} blobStore * @param {ReadonlyBlobStore} blobStore
* @return {Promise<FileData>} * @return {Promise<FileData>}
* @abstract * @abstract
* @see FileData#load * @see FileData#load
@ -165,7 +166,7 @@ class FileData {
/** /**
* @see File#load * @see File#load
* @param {string} kind * @param {string} kind
* @param {BlobStore} blobStore * @param {ReadonlyBlobStore} blobStore
* @return {Promise<FileData>} * @return {Promise<FileData>}
*/ */
async load(kind, blobStore) { async load(kind, blobStore) {

View file

@ -12,6 +12,7 @@ const EditOperationBuilder = require('../operation/edit_operation_builder')
/** /**
* @typedef {import('../types').BlobStore} BlobStore * @typedef {import('../types').BlobStore} BlobStore
* @typedef {import('../types').ReadonlyBlobStore} ReadonlyBlobStore
* @typedef {import('../types').RangesBlob} RangesBlob * @typedef {import('../types').RangesBlob} RangesBlob
*/ */
@ -106,7 +107,7 @@ class LazyStringFileData extends FileData {
/** /**
* @inheritdoc * @inheritdoc
* @param {BlobStore} blobStore * @param {ReadonlyBlobStore} blobStore
* @returns {Promise<EagerStringFileData>} * @returns {Promise<EagerStringFileData>}
*/ */
async toEager(blobStore) { async toEager(blobStore) {

View file

@ -1,3 +1,4 @@
// @ts-check
'use strict' 'use strict'
const _ = require('lodash') const _ = require('lodash')
@ -64,11 +65,12 @@ class FileMap {
static FileNotFoundError = FileNotFoundError static FileNotFoundError = FileNotFoundError
/** /**
* @param {Object.<String, File>} files * @param {Record<String, File | null>} files
*/ */
constructor(files) { constructor(files) {
// create bare object for use as Map // create bare object for use as Map
// http://ryanmorr.com/true-hash-maps-in-javascript/ // http://ryanmorr.com/true-hash-maps-in-javascript/
/** @type {Record<String, File | null>} */
this.files = Object.create(null) this.files = Object.create(null)
_.assign(this.files, files) _.assign(this.files, files)
checkPathnamesAreUnique(this.files) checkPathnamesAreUnique(this.files)
@ -221,8 +223,9 @@ class FileMap {
/** /**
* Map the files in this map to new values. * Map the files in this map to new values.
* @param {function} iteratee * @template T
* @return {Object} * @param {(file: File | null) => T} iteratee
* @return {Record<String, T>}
*/ */
map(iteratee) { map(iteratee) {
return _.mapValues(this.files, iteratee) return _.mapValues(this.files, iteratee)

View file

@ -10,6 +10,7 @@ const FILE_LOAD_CONCURRENCY = 50
/** /**
* @typedef {import("./types").BlobStore} BlobStore * @typedef {import("./types").BlobStore} BlobStore
* @typedef {import("./types").ReadonlyBlobStore} ReadonlyBlobStore
* @typedef {import("./change")} Change * @typedef {import("./change")} Change
* @typedef {import("./operation/text_operation")} TextOperation * @typedef {import("./operation/text_operation")} TextOperation
*/ */
@ -167,7 +168,7 @@ class Snapshot {
* Ignore recoverable errors (caused by historical bad data) unless opts.strict is true * Ignore recoverable errors (caused by historical bad data) unless opts.strict is true
* *
* @param {Change[]} changes * @param {Change[]} changes
* @param {object} opts * @param {object} [opts]
* @param {boolean} opts.strict - do not ignore recoverable errors * @param {boolean} opts.strict - do not ignore recoverable errors
*/ */
applyAll(changes, opts) { applyAll(changes, opts) {
@ -196,7 +197,7 @@ class Snapshot {
* Load all of the files in this snapshot. * Load all of the files in this snapshot.
* *
* @param {string} kind see {File#load} * @param {string} kind see {File#load}
* @param {BlobStore} blobStore * @param {ReadonlyBlobStore} blobStore
* @return {Promise<Object>} an object where keys are the pathnames and * @return {Promise<Object>} an object where keys are the pathnames and
* values are the files in the snapshot * values are the files in the snapshot
*/ */

View file

@ -8,6 +8,8 @@ export type BlobStore = {
getObject<T = unknown>(hash: string): Promise<T> getObject<T = unknown>(hash: string): Promise<T>
} }
export type ReadonlyBlobStore = Pick<BlobStore, 'getString' | 'getObject'>
export type RangesBlob = { export type RangesBlob = {
comments: CommentsListRawData comments: CommentsListRawData
trackedChanges: TrackedChangeRawData[] trackedChanges: TrackedChangeRawData[]

View file

@ -361,8 +361,13 @@ class BlobStore {
this.projectId = projectId this.projectId = projectId
} }
getString(hash) { async getString(hash) {
return getProjectBlobAsync(this.projectId, hash) return await getProjectBlobAsync(this.projectId, hash)
}
async getObject(hash) {
const string = await this.getString(hash)
return JSON.parse(string)
} }
} }

View file

@ -1,3 +1,4 @@
// @ts-check
import Core from 'overleaf-editor-core' import Core from 'overleaf-editor-core'
import { Readable as StringStream } from 'stream' import { Readable as StringStream } from 'stream'
import BPromise from 'bluebird' import BPromise from 'bluebird'
@ -6,10 +7,19 @@ import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as WebApiManager from './WebApiManager.js' import * as WebApiManager from './WebApiManager.js'
import * as Errors from './Errors.js' import * as Errors from './Errors.js'
/** @typedef {import('overleaf-editor-core').Snapshot} Snapshot */
StringStream.prototype._read = function () {} StringStream.prototype._read = function () {}
const MAX_REQUESTS = 4 // maximum number of parallel requests to v1 history service const MAX_REQUESTS = 4 // maximum number of parallel requests to v1 history service
/**
*
* @param {string} projectId
* @param {number} version
* @param {string} pathname
* @param {Function} callback
*/
export function getFileSnapshotStream(projectId, version, pathname, callback) { export function getFileSnapshotStream(projectId, version, pathname, callback) {
_getSnapshotAtVersion(projectId, version, (error, snapshot) => { _getSnapshotAtVersion(projectId, version, (error, snapshot) => {
if (error) { if (error) {
@ -34,7 +44,7 @@ export function getFileSnapshotStream(projectId, version, pathname, callback) {
.load('eager', HistoryStoreManager.getBlobStore(historyId)) .load('eager', HistoryStoreManager.getBlobStore(historyId))
.then(() => { .then(() => {
const stream = new StringStream() const stream = new StringStream()
stream.push(file.getContent()) stream.push(file.getContent({ filterTrackedDeletes: true }))
stream.push(null) stream.push(null)
callback(null, stream) callback(null, stream)
}) })
@ -70,7 +80,18 @@ export function getProjectSnapshot(projectId, version, callback) {
.then(() => { .then(() => {
const data = { const data = {
projectId, projectId,
files: snapshot.getFileMap().files, files: snapshot.getFileMap().map(file => {
if (!file) {
return null
}
const content = file.getContent({
filterTrackedDeletes: true,
})
if (content === null) {
return { data: { hash: file.getHash() } }
}
return { data: { content } }
}),
} }
callback(null, data) callback(null, data)
}) })
@ -79,6 +100,12 @@ export function getProjectSnapshot(projectId, version, callback) {
}) })
} }
/**
*
* @param {string} projectId
* @param {number} version
* @param {Function} callback
*/
function _getSnapshotAtVersion(projectId, version, callback) { function _getSnapshotAtVersion(projectId, version, callback) {
WebApiManager.getHistoryId(projectId, (error, historyId) => { WebApiManager.getHistoryId(projectId, (error, historyId) => {
if (error) { if (error) {
@ -133,10 +160,10 @@ function _loadFilesLimit(snapshot, kind, blobStore) {
return BPromise.map( return BPromise.map(
fileList, fileList,
file => { file => {
// only load changed files, others can be dereferenced from their // only load changed files or files with tracked changes, others can be
// blobs (this method is only used by the git bridge which // dereferenced from their blobs (this method is only used by the git
// understands how to load blobs). // bridge which understands how to load blobs).
if (!file.isEditable() || file.getHash()) { if (!file.isEditable() || (file.getHash() && !file.getRangesHash())) {
return return
} }
return file.load(kind, blobStore) return file.load(kind, blobStore)

View file

@ -44,7 +44,28 @@ describe('SnapshotManager', function () {
describe('getFileSnapshotStream', function () { describe('getFileSnapshotStream', function () {
beforeEach(function () { beforeEach(function () {
this.WebApiManager.getHistoryId.yields(null, this.historyId) this.WebApiManager.getHistoryId.yields(null, this.historyId)
return this.HistoryStoreManager.getChunkAtVersion.yields(null, { this.ranges = {
comments: [],
trackedChanges: [
{
range: { pos: 4, length: 6 },
tracking: {
userId: 'user-1',
ts: '2024-01-01T00:00:00.000Z',
type: 'delete',
},
},
{
range: { pos: 35, length: 5 },
tracking: {
userId: 'user-1',
ts: '2024-01-01T00:00:00.000Z',
type: 'insert',
},
},
],
}
this.HistoryStoreManager.getChunkAtVersion.yields(null, {
chunk: { chunk: {
history: { history: {
snapshot: { snapshot: {
@ -53,6 +74,11 @@ describe('SnapshotManager', function () {
hash: '35c9bd86574d61dcadbce2fdd3d4a0684272c6ea', hash: '35c9bd86574d61dcadbce2fdd3d4a0684272c6ea',
stringLength: 41, stringLength: 41,
}, },
'file_with_ranges.tex': {
hash: '5d2781d78fa5a97b7bafa849fe933dfc9dc93eba',
rangesHash: '73061952d41ce54825e2fc1c36b4cf736d5fb62f',
stringLength: 41,
},
'binary.png': { 'binary.png': {
hash: 'c6654ea913979e13e22022653d284444f284a172', hash: 'c6654ea913979e13e22022653d284444f284a172',
byteLength: 41, byteLength: 41,
@ -94,7 +120,7 @@ describe('SnapshotManager', function () {
}) })
}) })
describe('of a text file', function () { describe('of a text file with no tracked changes', function () {
beforeEach(function (done) { beforeEach(function (done) {
this.HistoryStoreManager.getBlobStore.withArgs(this.historyId).returns({ this.HistoryStoreManager.getBlobStore.withArgs(this.historyId).returns({
getString: BPromise.promisify( getString: BPromise.promisify(
@ -178,6 +204,56 @@ Seven eight nine\
}) })
}) })
describe('of a text file with tracked changes', function () {
beforeEach(function (done) {
this.HistoryStoreManager.getBlobStore.withArgs(this.historyId).returns({
getString: (this.getString = sinon
.stub()
.resolves('the quick brown fox jumps over the lazy dog')),
getObject: (this.getObject = sinon.stub().resolves(this.ranges)),
})
this.SnapshotManager.getFileSnapshotStream(
this.projectId,
5,
'file_with_ranges.tex',
(error, stream) => {
this.stream = stream
done(error)
}
)
})
it('should get the overleaf id', function () {
this.WebApiManager.getHistoryId
.calledWith(this.projectId)
.should.equal(true)
})
it('should get the chunk', function () {
this.HistoryStoreManager.getChunkAtVersion
.calledWith(this.projectId, this.historyId, 5)
.should.equal(true)
})
it('should get the blob of the starting snapshot', function () {
this.getString
.calledWith('5d2781d78fa5a97b7bafa849fe933dfc9dc93eba')
.should.equal(true)
})
it('should get the blob of the ranges', function () {
this.getObject
.calledWith('73061952d41ce54825e2fc1c36b4cf736d5fb62f')
.should.equal(true)
})
it('should return a string stream with the text content without the tracked deletes', function () {
expect(this.stream.read().toString()).to.equal(
'the brown fox jumps over the lazy dog'
)
})
})
describe('of a binary file', function () { describe('of a binary file', function () {
beforeEach(function (done) { beforeEach(function (done) {
this.HistoryStoreManager.getProjectBlobStream this.HistoryStoreManager.getProjectBlobStream
@ -245,6 +321,27 @@ Seven eight nine\
describe('getProjectSnapshot', function () { describe('getProjectSnapshot', function () {
beforeEach(function () { beforeEach(function () {
this.WebApiManager.getHistoryId.yields(null, this.historyId) this.WebApiManager.getHistoryId.yields(null, this.historyId)
this.ranges = {
comments: [],
trackedChanges: [
{
range: { pos: 5, length: 6 },
tracking: {
userId: 'user-1',
ts: '2024-01-01T00:00:00.000Z',
type: 'delete',
},
},
{
range: { pos: 12, length: 5 },
tracking: {
userId: 'user-1',
ts: '2024-01-01T00:00:00.000Z',
type: 'insert',
},
},
],
}
return this.HistoryStoreManager.getChunkAtVersion.yields(null, { return this.HistoryStoreManager.getChunkAtVersion.yields(null, {
chunk: (this.chunk = { chunk: (this.chunk = {
history: { history: {
@ -258,6 +355,16 @@ Seven eight nine\
hash: '35c9bd86574d61dcadbce2fdd3d4a0684272c6ea', hash: '35c9bd86574d61dcadbce2fdd3d4a0684272c6ea',
stringLength: 41, stringLength: 41,
}, },
'with_ranges_unchanged.tex': {
hash: '35c9bd86574d61dcadbce2fdd3d4a0684272c6ea',
rangesHash: '2e59fe3dbd5310703f89236d589d0b35db169cdf',
stringLength: 41,
},
'with_ranges_changed.tex': {
hash: '35c9bd86574d61dcadbce2fdd3d4a0684272c6ea',
rangesHash: '2e59fe3dbd5310703f89236d589d0b35db169cdf',
stringLength: 41,
},
'binary.png': { 'binary.png': {
hash: 'c6654ea913979e13e22022653d284444f284a172', hash: 'c6654ea913979e13e22022653d284444f284a172',
byteLength: 41, byteLength: 41,
@ -285,6 +392,16 @@ Seven eight nine\
timestamp: '2017-12-04T10:29:22.905Z', timestamp: '2017-12-04T10:29:22.905Z',
authors: [31], authors: [31],
}, },
{
operations: [
{
pathname: 'with_ranges_changed.tex',
textOperation: [41, '\n\nSeven eight'],
},
],
timestamp: '2017-12-04T10:29:25.905Z',
authors: [31],
},
], ],
}, },
startVersion: 3, startVersion: 3,
@ -302,22 +419,20 @@ Seven eight nine\
describe('of project', function () { describe('of project', function () {
beforeEach(function (done) { beforeEach(function (done) {
this.HistoryStoreManager.getBlobStore.withArgs(this.historyId).returns({ this.HistoryStoreManager.getBlobStore.withArgs(this.historyId).returns({
getString: BPromise.promisify( getString: (this.getString = sinon.stub().resolves(
(this.getString = sinon.stub().yields(
null,
`\ `\
Hello world Hello world
One two three One two three
Four five six\ Four five six\
`.replace(/^\t/g, '') `
)) )),
), getObject: (this.getObject = sinon.stub().resolves(this.ranges)),
}) })
this.SnapshotManager.getProjectSnapshot( this.SnapshotManager.getProjectSnapshot(
this.projectId, this.projectId,
5, 6,
(error, data) => { (error, data) => {
this.data = data this.data = data
done(error) done(error)
@ -326,36 +441,61 @@ Four five six\
}) })
it('should get the overleaf id', function () { it('should get the overleaf id', function () {
return this.WebApiManager.getHistoryId this.WebApiManager.getHistoryId
.calledWith(this.projectId) .calledWith(this.projectId)
.should.equal(true) .should.equal(true)
}) })
it('should get the chunk', function () { it('should get the chunk', function () {
return this.HistoryStoreManager.getChunkAtVersion this.HistoryStoreManager.getChunkAtVersion
.calledWith(this.projectId, this.historyId, 5) .calledWith(this.projectId, this.historyId, 6)
.should.equal(true) .should.equal(true)
}) })
return it('should produce the snapshot file data', function () { it('should get the ranges for the file with tracked changes', function () {
expect(this.data).to.have.all.keys(['files', 'projectId']) this.getObject.calledWith('2e59fe3dbd5310703f89236d589d0b35db169cdf')
expect(this.data.projectId).to.equal('project-id-123') })
expect(this.data.files['main.tex']).to.exist
expect(this.data.files['unchanged.tex']).to.exist it('should produce the snapshot file data', function () {
expect(this.data.files['binary.png']).to.exist expect(this.data).to.deep.equal({
files: {
'main.tex': {
// files with operations in the chunk should return content only // files with operations in the chunk should return content only
expect(this.data.files['main.tex'].data.content).to.equal( data: {
'Hello world\n\nOne two three\n\nFour five six\n\nSeven eight nine' content:
) 'Hello world\n\nOne two three\n\nFour five six\n\nSeven eight nine',
expect(this.data.files['main.tex'].data.hash).to.not.exist },
},
'unchanged.tex': {
// unchanged files in the chunk should return hash only // unchanged files in the chunk should return hash only
expect(this.data.files['unchanged.tex'].data.hash).to.equal( data: {
'35c9bd86574d61dcadbce2fdd3d4a0684272c6ea' hash: '35c9bd86574d61dcadbce2fdd3d4a0684272c6ea',
) },
expect(this.data.files['unchanged.tex'].data.content).to.not.exist },
return expect(this.data.files['binary.png'].data.hash).to.equal( 'with_ranges_changed.tex': {
'c6654ea913979e13e22022653d284444f284a172' // files in the chunk with tracked changes should return content
) // without the tracked deletes
data: {
content:
'Hello\n\nOne two three\n\nFour five six\n\nSeven eight',
},
},
'with_ranges_unchanged.tex': {
// files in the chunk with tracked changes should return content
// without the tracked deletes, even if they are unchanged
data: {
content: 'Hello\n\nOne two three\n\nFour five six',
},
},
'binary.png': {
// binary files in the chunk should return hash only
data: {
hash: 'c6654ea913979e13e22022653d284444f284a172',
},
},
},
projectId: 'project-id-123',
})
}) })
}) })