[project-history] add resync for project structure only (#23271)

* [project-history] add resync for project structure only

* [project-history] block resyncProjectStructureOnly that update docs

* [project-history] tweak test description

GitOrigin-RevId: ce2749566c36e04dab21c26dd60dd75d93c0d4c0
This commit is contained in:
Jakob Ackermann 2025-01-31 14:06:54 +00:00 committed by Copybot
parent 8df7f6772c
commit b0c9176634
14 changed files with 410 additions and 25 deletions

View file

@ -106,10 +106,12 @@ const HistoryManager = {
projectHistoryId, projectHistoryId,
docs, docs,
files, files,
opts,
function (error) { function (error) {
if (error) { if (error) {
return callback(error) return callback(error)
} }
if (opts.resyncProjectStructureOnly) return callback()
const DocumentManager = require('./DocumentManager') const DocumentManager = require('./DocumentManager')
const resyncDoc = (doc, cb) => { const resyncDoc = (doc, cb) => {
DocumentManager.resyncDocContentsWithLock( DocumentManager.resyncDocContentsWithLock(

View file

@ -433,7 +433,13 @@ function updateProject(req, res, next) {
function resyncProjectHistory(req, res, next) { function resyncProjectHistory(req, res, next) {
const projectId = req.params.project_id const projectId = req.params.project_id
const { projectHistoryId, docs, files, historyRangesMigration } = req.body const {
projectHistoryId,
docs,
files,
historyRangesMigration,
resyncProjectStructureOnly,
} = req.body
logger.debug( logger.debug(
{ projectId, docs, files }, { projectId, docs, files },
@ -444,6 +450,9 @@ function resyncProjectHistory(req, res, next) {
if (historyRangesMigration) { if (historyRangesMigration) {
opts.historyRangesMigration = historyRangesMigration opts.historyRangesMigration = historyRangesMigration
} }
if (resyncProjectStructureOnly) {
opts.resyncProjectStructureOnly = resyncProjectStructureOnly
}
HistoryManager.resyncProjectHistory( HistoryManager.resyncProjectHistory(
projectId, projectId,

View file

@ -152,7 +152,13 @@ const ProjectHistoryRedisManager = {
return await ProjectHistoryRedisManager.queueOps(projectId, jsonUpdate) return await ProjectHistoryRedisManager.queueOps(projectId, jsonUpdate)
}, },
async queueResyncProjectStructure(projectId, projectHistoryId, docs, files) { async queueResyncProjectStructure(
projectId,
projectHistoryId,
docs,
files,
opts
) {
logger.debug({ projectId, docs, files }, 'queue project structure resync') logger.debug({ projectId, docs, files }, 'queue project structure resync')
const projectUpdate = { const projectUpdate = {
resyncProjectStructure: { docs, files }, resyncProjectStructure: { docs, files },
@ -161,6 +167,9 @@ const ProjectHistoryRedisManager = {
ts: new Date(), ts: new Date(),
}, },
} }
if (opts.resyncProjectStructureOnly) {
projectUpdate.resyncProjectStructureOnly = opts.resyncProjectStructureOnly
}
const jsonUpdate = JSON.stringify(projectUpdate) const jsonUpdate = JSON.stringify(projectUpdate)
return await ProjectHistoryRedisManager.queueOps(projectId, jsonUpdate) return await ProjectHistoryRedisManager.queueOps(projectId, jsonUpdate)
}, },

View file

@ -217,34 +217,75 @@ describe('HistoryManager', function () {
.stub() .stub()
.yields() .yields()
this.DocumentManager.resyncDocContentsWithLock = sinon.stub().yields() this.DocumentManager.resyncDocContentsWithLock = sinon.stub().yields()
this.HistoryManager.resyncProjectHistory(
this.project_id,
this.projectHistoryId,
this.docs,
this.files,
this.callback
)
}) })
it('should queue a project structure reync', function () { describe('full sync', function () {
this.ProjectHistoryRedisManager.queueResyncProjectStructure beforeEach(function () {
.calledWith( this.HistoryManager.resyncProjectHistory(
this.project_id, this.project_id,
this.projectHistoryId, this.projectHistoryId,
this.docs, this.docs,
this.files this.files,
{},
this.callback
) )
.should.equal(true) })
it('should queue a project structure reync', function () {
this.ProjectHistoryRedisManager.queueResyncProjectStructure
.calledWith(
this.project_id,
this.projectHistoryId,
this.docs,
this.files
)
.should.equal(true)
})
it('should queue doc content reyncs', function () {
this.DocumentManager.resyncDocContentsWithLock
.calledWith(this.project_id, this.docs[0].doc, this.docs[0].path)
.should.equal(true)
})
it('should call the callback', function () {
this.callback.called.should.equal(true)
})
}) })
it('should queue doc content reyncs', function () { describe('resyncProjectStructureOnly=true', function () {
this.DocumentManager.resyncDocContentsWithLock beforeEach(function () {
.calledWith(this.project_id, this.docs[0].doc, this.docs[0].path) this.HistoryManager.resyncProjectHistory(
.should.equal(true) this.project_id,
}) this.projectHistoryId,
this.docs,
this.files,
{ resyncProjectStructureOnly: true },
this.callback
)
})
it('should call the callback', function () { it('should queue a project structure reync', function () {
this.callback.called.should.equal(true) this.ProjectHistoryRedisManager.queueResyncProjectStructure
.calledWith(
this.project_id,
this.projectHistoryId,
this.docs,
this.files,
{ resyncProjectStructureOnly: true }
)
.should.equal(true)
})
it('should not queue doc content reyncs', function () {
this.DocumentManager.resyncDocContentsWithLock.called.should.equal(
false
)
})
it('should call the callback', function () {
this.callback.called.should.equal(true)
})
}) })
}) })
}) })

View file

@ -8,3 +8,4 @@ export class InconsistentChunkError extends OError {}
export class UpdateWithUnknownFormatError extends OError {} export class UpdateWithUnknownFormatError extends OError {}
export class UnexpectedOpTypeError extends OError {} export class UnexpectedOpTypeError extends OError {}
export class TooManyRequestsError extends OError {} export class TooManyRequestsError extends OError {}
export class NeedFullProjectStructureResyncError extends OError {}

View file

@ -136,6 +136,9 @@ async function getUpdatesInBatches(projectId, batchSize, runner) {
moreBatches = true moreBatches = true
break break
} }
if (update.resyncProjectStructureOnly) {
update._raw = rawUpdate
}
rawUpdates.push(rawUpdate) rawUpdates.push(rawUpdate)
updates.push(update) updates.push(update)
@ -151,6 +154,26 @@ async function getUpdatesInBatches(projectId, batchSize, runner) {
} }
} }
/**
* @param {string} projectId
* @param {ResyncProjectStructureUpdate} update
* @return {Promise<void>}
*/
async function deleteAppliedDocUpdate(projectId, update) {
const raw = update._raw
// Delete the first occurrence of the update with LREM KEY COUNT
// VALUE by setting COUNT to 1 which 'removes COUNT elements equal to
// value moving from head to tail.'
//
// If COUNT is 0 the entire list would be searched which would block
// redis since it would be an O(N) operation where N is the length of
// the queue, in a multi of the batch size.
metrics.summary('redis.projectHistoryOps', raw.length, {
status: 'lrem',
})
await rclient.lrem(Keys.projectHistoryOps({ project_id: projectId }), 1, raw)
}
async function deleteAppliedDocUpdates(projectId, updates) { async function deleteAppliedDocUpdates(projectId, updates) {
const multi = rclient.multi() const multi = rclient.multi()
// Delete all the updates which have been applied (exact match) // Delete all the updates which have been applied (exact match)
@ -160,7 +183,7 @@ async function deleteAppliedDocUpdates(projectId, updates) {
// value moving from head to tail.' // value moving from head to tail.'
// //
// If COUNT is 0 the entire list would be searched which would block // If COUNT is 0 the entire list would be searched which would block
// redis snce it would be an O(N) operation where N is the length of // redis since it would be an O(N) operation where N is the length of
// the queue, in a multi of the batch size. // the queue, in a multi of the batch size.
metrics.summary('redis.projectHistoryOps', update.length, { metrics.summary('redis.projectHistoryOps', update.length, {
status: 'lrem', status: 'lrem',
@ -383,6 +406,7 @@ export const promises = {
countUnprocessedUpdates, countUnprocessedUpdates,
getRawUpdatesBatch, getRawUpdatesBatch,
deleteAppliedDocUpdates, deleteAppliedDocUpdates,
deleteAppliedDocUpdate,
destroyDocUpdatesQueue, destroyDocUpdatesQueue,
getUpdatesInBatches, getUpdatesInBatches,
getProjectIdsWithHistoryOps, getProjectIdsWithHistoryOps,

View file

@ -8,7 +8,7 @@ import logger from '@overleaf/logger'
import Metrics from '@overleaf/metrics' import Metrics from '@overleaf/metrics'
import OError from '@overleaf/o-error' import OError from '@overleaf/o-error'
import { File, Range } from 'overleaf-editor-core' import { File, Range } from 'overleaf-editor-core'
import { SyncError } from './Errors.js' import { NeedFullProjectStructureResyncError, SyncError } from './Errors.js'
import { db, ObjectId } from './mongodb.js' import { db, ObjectId } from './mongodb.js'
import * as SnapshotManager from './SnapshotManager.js' import * as SnapshotManager from './SnapshotManager.js'
import * as LockManager from './LockManager.js' import * as LockManager from './LockManager.js'
@ -100,6 +100,9 @@ async function _startResyncWithoutLock(projectId, options) {
if (options.historyRangesMigration) { if (options.historyRangesMigration) {
webOpts.historyRangesMigration = options.historyRangesMigration webOpts.historyRangesMigration = options.historyRangesMigration
} }
if (options.resyncProjectStructureOnly) {
webOpts.resyncProjectStructureOnly = options.resyncProjectStructureOnly
}
await WebApiManager.promises.requestResync(projectId, webOpts) await WebApiManager.promises.requestResync(projectId, webOpts)
await setResyncState(projectId, syncState) await setResyncState(projectId, syncState)
} }
@ -281,8 +284,10 @@ class SyncState {
}) })
} }
for (const doc of update.resyncProjectStructure.docs) { if (!update.resyncProjectStructureOnly) {
this.startDocContentSync(doc.path) for (const doc of update.resyncProjectStructure.docs) {
this.startDocContentSync(doc.path)
}
} }
this.stopProjectStructureSync() this.stopProjectStructureSync()
@ -475,6 +480,28 @@ class SyncUpdateExpander {
persistedBinaryFiles persistedBinaryFiles
) )
this.queueSetMetadataOpsForLinkedFiles(update) this.queueSetMetadataOpsForLinkedFiles(update)
if (update.resyncProjectStructureOnly) {
const docPaths = new Set()
for (const entity of update.resyncProjectStructure.docs) {
const path = UpdateTranslator._convertPathname(entity.path)
docPaths.add(path)
}
for (const expandedUpdate of this.expandedUpdates) {
if (docPaths.has(expandedUpdate.pathname)) {
// Clear the resync state and queue entry, we need to start over.
this.expandedUpdates = []
await clearResyncState(this.projectId)
await RedisManager.promises.deleteAppliedDocUpdate(
this.projectId,
update
)
throw new NeedFullProjectStructureResyncError(
'aborting partial resync: touched doc'
)
}
}
}
} else if ('resyncDocContent' in update) { } else if ('resyncDocContent' in update) {
logger.debug( logger.debug(
{ projectId: this.projectId, update }, { projectId: this.projectId, update },

View file

@ -39,6 +39,9 @@ async function requestResync(projectId, opts = {}) {
if (opts.historyRangesMigration) { if (opts.historyRangesMigration) {
body.historyRangesMigration = opts.historyRangesMigration body.historyRangesMigration = opts.historyRangesMigration
} }
if (opts.resyncProjectStructureOnly) {
body.resyncProjectStructureOnly = opts.resyncProjectStructureOnly
}
await fetchNothing( await fetchNothing(
`${Settings.apis.web.url}/project/${projectId}/history/resync`, `${Settings.apis.web.url}/project/${projectId}/history/resync`,
{ {

View file

@ -94,6 +94,9 @@ export type ResyncProjectStructureUpdate = {
meta: { meta: {
ts: string ts: string
} }
// optional fields for resyncProjectStructureOnly=true
resyncProjectStructureOnly?: boolean
_raw: string
} }
export type ResyncDocContentUpdate = { export type ResyncDocContentUpdate = {

View file

@ -9,6 +9,7 @@ import Settings from '@overleaf/settings'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js' import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js' import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
import sinon from 'sinon' import sinon from 'sinon'
import { getFailure } from './helpers/ProjectHistoryClient.js'
const { ObjectId } = mongodb const { ObjectId } = mongodb
const EMPTY_FILE_HASH = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391' const EMPTY_FILE_HASH = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391'
@ -1315,6 +1316,228 @@ describe('Syncing with web and doc-updater', function () {
) )
}) })
}) })
describe('resyncProjectStructureOnly', function () {
it('should handle structure only updates', function (done) {
const fileHash = 'aed2973e4b8a7ff1b30ff5c4751e5a2b38989e74'
MockHistoryStore()
.get(`/api/projects/${historyId}/latest/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'main.tex': {
hash: '0a207c060e61f3b88eaee0a8cd0696f46fb155eb',
stringLength: 3,
},
},
},
changes: [],
},
startVersion: 0,
},
})
const docContentRequest = MockHistoryStore()
.get(
`/api/projects/${historyId}/blobs/0a207c060e61f3b88eaee0a8cd0696f46fb155eb`
)
.reply(200, 'a\nb')
MockHistoryStore()
.head(`/api/projects/${historyId}/blobs/${fileHash}`)
.reply(200)
const addFile = MockHistoryStore()
.post(`/api/projects/${historyId}/legacy_changes`, body => {
expect(body).to.deep.equal([
{
v2Authors: [],
authors: [],
timestamp: this.timestamp.toJSON(),
operations: [
{
pathname: 'test.png',
file: {
hash: fileHash,
},
},
],
origin: { kind: 'test-origin' },
},
])
return true
})
.query({ end_version: 0 })
.reply(204)
// allow a 2nd resync
MockWeb()
.post(`/project/${this.project_id}/history/resync`)
.reply(204)
async.series(
[
cb => {
ProjectHistoryClient.resyncHistory(this.project_id, cb)
},
cb => {
const update = {
projectHistoryId: historyId,
resyncProjectStructureOnly: true,
resyncProjectStructure: {
docs: [{ path: '/main.tex' }],
files: [
{
file: this.file_id,
path: '/test.png',
_hash: fileHash,
createdBlob: true,
},
],
},
meta: {
ts: this.timestamp,
},
}
ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
},
cb => {
ProjectHistoryClient.flushProject(this.project_id, cb)
},
cb => {
// fails when previous resync did not finish
ProjectHistoryClient.resyncHistory(this.project_id, cb)
},
],
error => {
if (error) {
throw error
}
assert(
addFile.isDone(),
`/api/projects/${historyId}/changes should have been called`
)
assert(
!docContentRequest.isDone(),
'should not have requested doc content'
)
done()
}
)
})
it('should reject partial resync on docs', function (done) {
const fileHash = 'aed2973e4b8a7ff1b30ff5c4751e5a2b38989e74'
MockHistoryStore()
.get(`/api/projects/${historyId}/latest/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'main.tex': {
hash: '0a207c060e61f3b88eaee0a8cd0696f46fb155eb',
stringLength: 3,
},
},
},
changes: [],
},
startVersion: 0,
},
})
const docContentRequest = MockHistoryStore()
.get(
`/api/projects/${historyId}/blobs/0a207c060e61f3b88eaee0a8cd0696f46fb155eb`
)
.reply(200, 'a\nb')
MockHistoryStore()
.head(`/api/projects/${historyId}/blobs/${fileHash}`)
.reply(200)
const addFile = MockHistoryStore()
.post(`/api/projects/${historyId}/legacy_changes`)
.query({ end_version: 0 })
.reply(204)
// allow a 2nd resync
MockWeb()
.post(`/project/${this.project_id}/history/resync`)
.reply(204)
async.series(
[
cb => {
ProjectHistoryClient.resyncHistory(this.project_id, cb)
},
cb => {
const update = {
projectHistoryId: historyId,
resyncProjectStructureOnly: true,
resyncProjectStructure: {
docs: [{ path: '/main-renamed.tex' }],
files: [
{
file: this.file_id,
path: '/test.png',
_hash: fileHash,
createdBlob: true,
},
],
},
meta: {
ts: this.timestamp,
},
}
ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
},
cb => {
ProjectHistoryClient.flushProject(
this.project_id,
{ allowErrors: true },
(err, res) => {
if (err) return cb(err)
expect(res.statusCode).to.equal(500)
expect(loggerError).to.have.been.calledWith(
sinon.match({
err: {
name: 'NeedFullProjectStructureResyncError',
message: 'aborting partial resync: touched doc',
},
})
)
getFailure(this.project_id, (err, failure) => {
if (err) return cb(err)
expect(failure).to.include({
error:
'NeedFullProjectStructureResyncError: aborting partial resync: touched doc',
})
cb()
})
}
)
},
cb => {
// fails when previous resync did not finish
ProjectHistoryClient.resyncHistory(this.project_id, cb)
},
],
error => {
if (error) {
throw error
}
assert(!addFile.isDone(), 'should not have persisted changes')
assert(
!docContentRequest.isDone(),
'should not have requested doc content'
)
done()
}
)
})
})
}) })
}) })
}) })

View file

@ -330,6 +330,10 @@ export function setFailure(failureEntry, callback) {
) )
} }
export function getFailure(projectId, callback) {
db.projectHistoryFailures.findOne({ project_id: projectId }, callback)
}
export function transferLabelOwnership(fromUser, toUser, callback) { export function transferLabelOwnership(fromUser, toUser, callback) {
request.post( request.post(
{ {

View file

@ -411,6 +411,39 @@ describe('SyncManager', function () {
}) })
}) })
it('records docs to resync when resyncProjectStructureOnly=true is not set', async function () {
const updates = [this.projectStructureSyncUpdate]
const { updates: filteredUpdates, syncState } =
await this.SyncManager.promises.skipUpdatesDuringSync(
this.projectId,
updates
)
expect(filteredUpdates).to.deep.equal([this.projectStructureSyncUpdate])
expect(syncState.toRaw()).to.deep.equal({
resyncProjectStructure: false,
resyncDocContents: ['new.tex'],
origin: { kind: 'history-resync' },
})
})
it('records no docs to resync with resyncProjectStructureOnly=true', async function () {
this.projectStructureSyncUpdate.resyncProjectStructureOnly = true
const updates = [this.projectStructureSyncUpdate]
const { updates: filteredUpdates, syncState } =
await this.SyncManager.promises.skipUpdatesDuringSync(
this.projectId,
updates
)
expect(filteredUpdates).to.deep.equal([this.projectStructureSyncUpdate])
expect(syncState.toRaw()).to.deep.equal({
resyncProjectStructure: false,
resyncDocContents: [],
origin: { kind: 'history-resync' },
})
})
it('allow project structure updates after project structure sync update', async function () { it('allow project structure updates after project structure sync update', async function () {
const updates = [this.projectStructureSyncUpdate, this.renameUpdate] const updates = [this.projectStructureSyncUpdate, this.renameUpdate]
const { updates: filteredUpdates, syncState } = const { updates: filteredUpdates, syncState } =

View file

@ -302,6 +302,9 @@ function resyncProjectHistory(
if (opts.historyRangesMigration) { if (opts.historyRangesMigration) {
body.historyRangesMigration = opts.historyRangesMigration body.historyRangesMigration = opts.historyRangesMigration
} }
if (opts.resyncProjectStructureOnly) {
body.resyncProjectStructureOnly = opts.resyncProjectStructureOnly
}
_makeRequest( _makeRequest(
{ {
path: `/project/${projectId}/history/resync`, path: `/project/${projectId}/history/resync`,

View file

@ -148,6 +148,9 @@ module.exports = HistoryController = {
if (historyRangesMigration) { if (historyRangesMigration) {
opts.historyRangesMigration = historyRangesMigration opts.historyRangesMigration = historyRangesMigration
} }
if (req.body.resyncProjectStructureOnly) {
opts.resyncProjectStructureOnly = req.body.resyncProjectStructureOnly
}
ProjectEntityUpdateHandler.resyncProjectHistory( ProjectEntityUpdateHandler.resyncProjectHistory(
projectId, projectId,
opts, opts,