[project-history] add resync for project structure only (#23271)

* [project-history] add resync for project structure only

* [project-history] block resyncProjectStructureOnly that update docs

* [project-history] tweak test description

GitOrigin-RevId: ce2749566c36e04dab21c26dd60dd75d93c0d4c0
This commit is contained in:
Jakob Ackermann 2025-01-31 14:06:54 +00:00 committed by Copybot
parent 8df7f6772c
commit b0c9176634
14 changed files with 410 additions and 25 deletions

View file

@ -106,10 +106,12 @@ const HistoryManager = {
projectHistoryId,
docs,
files,
opts,
function (error) {
if (error) {
return callback(error)
}
if (opts.resyncProjectStructureOnly) return callback()
const DocumentManager = require('./DocumentManager')
const resyncDoc = (doc, cb) => {
DocumentManager.resyncDocContentsWithLock(

View file

@ -433,7 +433,13 @@ function updateProject(req, res, next) {
function resyncProjectHistory(req, res, next) {
const projectId = req.params.project_id
const { projectHistoryId, docs, files, historyRangesMigration } = req.body
const {
projectHistoryId,
docs,
files,
historyRangesMigration,
resyncProjectStructureOnly,
} = req.body
logger.debug(
{ projectId, docs, files },
@ -444,6 +450,9 @@ function resyncProjectHistory(req, res, next) {
if (historyRangesMigration) {
opts.historyRangesMigration = historyRangesMigration
}
if (resyncProjectStructureOnly) {
opts.resyncProjectStructureOnly = resyncProjectStructureOnly
}
HistoryManager.resyncProjectHistory(
projectId,

View file

@ -152,7 +152,13 @@ const ProjectHistoryRedisManager = {
return await ProjectHistoryRedisManager.queueOps(projectId, jsonUpdate)
},
async queueResyncProjectStructure(projectId, projectHistoryId, docs, files) {
async queueResyncProjectStructure(
projectId,
projectHistoryId,
docs,
files,
opts
) {
logger.debug({ projectId, docs, files }, 'queue project structure resync')
const projectUpdate = {
resyncProjectStructure: { docs, files },
@ -161,6 +167,9 @@ const ProjectHistoryRedisManager = {
ts: new Date(),
},
}
if (opts.resyncProjectStructureOnly) {
projectUpdate.resyncProjectStructureOnly = opts.resyncProjectStructureOnly
}
const jsonUpdate = JSON.stringify(projectUpdate)
return await ProjectHistoryRedisManager.queueOps(projectId, jsonUpdate)
},

View file

@ -217,34 +217,75 @@ describe('HistoryManager', function () {
.stub()
.yields()
this.DocumentManager.resyncDocContentsWithLock = sinon.stub().yields()
this.HistoryManager.resyncProjectHistory(
this.project_id,
this.projectHistoryId,
this.docs,
this.files,
this.callback
)
})
it('should queue a project structure reync', function () {
this.ProjectHistoryRedisManager.queueResyncProjectStructure
.calledWith(
describe('full sync', function () {
beforeEach(function () {
this.HistoryManager.resyncProjectHistory(
this.project_id,
this.projectHistoryId,
this.docs,
this.files
this.files,
{},
this.callback
)
.should.equal(true)
})
it('should queue a project structure reync', function () {
this.ProjectHistoryRedisManager.queueResyncProjectStructure
.calledWith(
this.project_id,
this.projectHistoryId,
this.docs,
this.files
)
.should.equal(true)
})
it('should queue doc content reyncs', function () {
this.DocumentManager.resyncDocContentsWithLock
.calledWith(this.project_id, this.docs[0].doc, this.docs[0].path)
.should.equal(true)
})
it('should call the callback', function () {
this.callback.called.should.equal(true)
})
})
it('should queue doc content reyncs', function () {
this.DocumentManager.resyncDocContentsWithLock
.calledWith(this.project_id, this.docs[0].doc, this.docs[0].path)
.should.equal(true)
})
describe('resyncProjectStructureOnly=true', function () {
beforeEach(function () {
this.HistoryManager.resyncProjectHistory(
this.project_id,
this.projectHistoryId,
this.docs,
this.files,
{ resyncProjectStructureOnly: true },
this.callback
)
})
it('should call the callback', function () {
this.callback.called.should.equal(true)
it('should queue a project structure reync', function () {
this.ProjectHistoryRedisManager.queueResyncProjectStructure
.calledWith(
this.project_id,
this.projectHistoryId,
this.docs,
this.files,
{ resyncProjectStructureOnly: true }
)
.should.equal(true)
})
it('should not queue doc content reyncs', function () {
this.DocumentManager.resyncDocContentsWithLock.called.should.equal(
false
)
})
it('should call the callback', function () {
this.callback.called.should.equal(true)
})
})
})
})

View file

@ -8,3 +8,4 @@ export class InconsistentChunkError extends OError {}
export class UpdateWithUnknownFormatError extends OError {}
export class UnexpectedOpTypeError extends OError {}
export class TooManyRequestsError extends OError {}
export class NeedFullProjectStructureResyncError extends OError {}

View file

@ -136,6 +136,9 @@ async function getUpdatesInBatches(projectId, batchSize, runner) {
moreBatches = true
break
}
if (update.resyncProjectStructureOnly) {
update._raw = rawUpdate
}
rawUpdates.push(rawUpdate)
updates.push(update)
@ -151,6 +154,26 @@ async function getUpdatesInBatches(projectId, batchSize, runner) {
}
}
/**
* @param {string} projectId
* @param {ResyncProjectStructureUpdate} update
* @return {Promise<void>}
*/
async function deleteAppliedDocUpdate(projectId, update) {
const raw = update._raw
// Delete the first occurrence of the update with LREM KEY COUNT
// VALUE by setting COUNT to 1 which 'removes COUNT elements equal to
// value moving from head to tail.'
//
// If COUNT is 0 the entire list would be searched which would block
// redis since it would be an O(N) operation where N is the length of
// the queue, in a multi of the batch size.
metrics.summary('redis.projectHistoryOps', raw.length, {
status: 'lrem',
})
await rclient.lrem(Keys.projectHistoryOps({ project_id: projectId }), 1, raw)
}
async function deleteAppliedDocUpdates(projectId, updates) {
const multi = rclient.multi()
// Delete all the updates which have been applied (exact match)
@ -160,7 +183,7 @@ async function deleteAppliedDocUpdates(projectId, updates) {
// value moving from head to tail.'
//
// If COUNT is 0 the entire list would be searched which would block
// redis snce it would be an O(N) operation where N is the length of
// redis since it would be an O(N) operation where N is the length of
// the queue, in a multi of the batch size.
metrics.summary('redis.projectHistoryOps', update.length, {
status: 'lrem',
@ -383,6 +406,7 @@ export const promises = {
countUnprocessedUpdates,
getRawUpdatesBatch,
deleteAppliedDocUpdates,
deleteAppliedDocUpdate,
destroyDocUpdatesQueue,
getUpdatesInBatches,
getProjectIdsWithHistoryOps,

View file

@ -8,7 +8,7 @@ import logger from '@overleaf/logger'
import Metrics from '@overleaf/metrics'
import OError from '@overleaf/o-error'
import { File, Range } from 'overleaf-editor-core'
import { SyncError } from './Errors.js'
import { NeedFullProjectStructureResyncError, SyncError } from './Errors.js'
import { db, ObjectId } from './mongodb.js'
import * as SnapshotManager from './SnapshotManager.js'
import * as LockManager from './LockManager.js'
@ -100,6 +100,9 @@ async function _startResyncWithoutLock(projectId, options) {
if (options.historyRangesMigration) {
webOpts.historyRangesMigration = options.historyRangesMigration
}
if (options.resyncProjectStructureOnly) {
webOpts.resyncProjectStructureOnly = options.resyncProjectStructureOnly
}
await WebApiManager.promises.requestResync(projectId, webOpts)
await setResyncState(projectId, syncState)
}
@ -281,8 +284,10 @@ class SyncState {
})
}
for (const doc of update.resyncProjectStructure.docs) {
this.startDocContentSync(doc.path)
if (!update.resyncProjectStructureOnly) {
for (const doc of update.resyncProjectStructure.docs) {
this.startDocContentSync(doc.path)
}
}
this.stopProjectStructureSync()
@ -475,6 +480,28 @@ class SyncUpdateExpander {
persistedBinaryFiles
)
this.queueSetMetadataOpsForLinkedFiles(update)
if (update.resyncProjectStructureOnly) {
const docPaths = new Set()
for (const entity of update.resyncProjectStructure.docs) {
const path = UpdateTranslator._convertPathname(entity.path)
docPaths.add(path)
}
for (const expandedUpdate of this.expandedUpdates) {
if (docPaths.has(expandedUpdate.pathname)) {
// Clear the resync state and queue entry, we need to start over.
this.expandedUpdates = []
await clearResyncState(this.projectId)
await RedisManager.promises.deleteAppliedDocUpdate(
this.projectId,
update
)
throw new NeedFullProjectStructureResyncError(
'aborting partial resync: touched doc'
)
}
}
}
} else if ('resyncDocContent' in update) {
logger.debug(
{ projectId: this.projectId, update },

View file

@ -39,6 +39,9 @@ async function requestResync(projectId, opts = {}) {
if (opts.historyRangesMigration) {
body.historyRangesMigration = opts.historyRangesMigration
}
if (opts.resyncProjectStructureOnly) {
body.resyncProjectStructureOnly = opts.resyncProjectStructureOnly
}
await fetchNothing(
`${Settings.apis.web.url}/project/${projectId}/history/resync`,
{

View file

@ -94,6 +94,9 @@ export type ResyncProjectStructureUpdate = {
meta: {
ts: string
}
// optional fields for resyncProjectStructureOnly=true
resyncProjectStructureOnly?: boolean
_raw: string
}
export type ResyncDocContentUpdate = {

View file

@ -9,6 +9,7 @@ import Settings from '@overleaf/settings'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
import sinon from 'sinon'
import { getFailure } from './helpers/ProjectHistoryClient.js'
const { ObjectId } = mongodb
const EMPTY_FILE_HASH = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391'
@ -1315,6 +1316,228 @@ describe('Syncing with web and doc-updater', function () {
)
})
})
describe('resyncProjectStructureOnly', function () {
it('should handle structure only updates', function (done) {
const fileHash = 'aed2973e4b8a7ff1b30ff5c4751e5a2b38989e74'
MockHistoryStore()
.get(`/api/projects/${historyId}/latest/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'main.tex': {
hash: '0a207c060e61f3b88eaee0a8cd0696f46fb155eb',
stringLength: 3,
},
},
},
changes: [],
},
startVersion: 0,
},
})
const docContentRequest = MockHistoryStore()
.get(
`/api/projects/${historyId}/blobs/0a207c060e61f3b88eaee0a8cd0696f46fb155eb`
)
.reply(200, 'a\nb')
MockHistoryStore()
.head(`/api/projects/${historyId}/blobs/${fileHash}`)
.reply(200)
const addFile = MockHistoryStore()
.post(`/api/projects/${historyId}/legacy_changes`, body => {
expect(body).to.deep.equal([
{
v2Authors: [],
authors: [],
timestamp: this.timestamp.toJSON(),
operations: [
{
pathname: 'test.png',
file: {
hash: fileHash,
},
},
],
origin: { kind: 'test-origin' },
},
])
return true
})
.query({ end_version: 0 })
.reply(204)
// allow a 2nd resync
MockWeb()
.post(`/project/${this.project_id}/history/resync`)
.reply(204)
async.series(
[
cb => {
ProjectHistoryClient.resyncHistory(this.project_id, cb)
},
cb => {
const update = {
projectHistoryId: historyId,
resyncProjectStructureOnly: true,
resyncProjectStructure: {
docs: [{ path: '/main.tex' }],
files: [
{
file: this.file_id,
path: '/test.png',
_hash: fileHash,
createdBlob: true,
},
],
},
meta: {
ts: this.timestamp,
},
}
ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
},
cb => {
ProjectHistoryClient.flushProject(this.project_id, cb)
},
cb => {
// fails when previous resync did not finish
ProjectHistoryClient.resyncHistory(this.project_id, cb)
},
],
error => {
if (error) {
throw error
}
assert(
addFile.isDone(),
`/api/projects/${historyId}/changes should have been called`
)
assert(
!docContentRequest.isDone(),
'should not have requested doc content'
)
done()
}
)
})
it('should reject partial resync on docs', function (done) {
const fileHash = 'aed2973e4b8a7ff1b30ff5c4751e5a2b38989e74'
MockHistoryStore()
.get(`/api/projects/${historyId}/latest/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'main.tex': {
hash: '0a207c060e61f3b88eaee0a8cd0696f46fb155eb',
stringLength: 3,
},
},
},
changes: [],
},
startVersion: 0,
},
})
const docContentRequest = MockHistoryStore()
.get(
`/api/projects/${historyId}/blobs/0a207c060e61f3b88eaee0a8cd0696f46fb155eb`
)
.reply(200, 'a\nb')
MockHistoryStore()
.head(`/api/projects/${historyId}/blobs/${fileHash}`)
.reply(200)
const addFile = MockHistoryStore()
.post(`/api/projects/${historyId}/legacy_changes`)
.query({ end_version: 0 })
.reply(204)
// allow a 2nd resync
MockWeb()
.post(`/project/${this.project_id}/history/resync`)
.reply(204)
async.series(
[
cb => {
ProjectHistoryClient.resyncHistory(this.project_id, cb)
},
cb => {
const update = {
projectHistoryId: historyId,
resyncProjectStructureOnly: true,
resyncProjectStructure: {
docs: [{ path: '/main-renamed.tex' }],
files: [
{
file: this.file_id,
path: '/test.png',
_hash: fileHash,
createdBlob: true,
},
],
},
meta: {
ts: this.timestamp,
},
}
ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
},
cb => {
ProjectHistoryClient.flushProject(
this.project_id,
{ allowErrors: true },
(err, res) => {
if (err) return cb(err)
expect(res.statusCode).to.equal(500)
expect(loggerError).to.have.been.calledWith(
sinon.match({
err: {
name: 'NeedFullProjectStructureResyncError',
message: 'aborting partial resync: touched doc',
},
})
)
getFailure(this.project_id, (err, failure) => {
if (err) return cb(err)
expect(failure).to.include({
error:
'NeedFullProjectStructureResyncError: aborting partial resync: touched doc',
})
cb()
})
}
)
},
cb => {
// fails when previous resync did not finish
ProjectHistoryClient.resyncHistory(this.project_id, cb)
},
],
error => {
if (error) {
throw error
}
assert(!addFile.isDone(), 'should not have persisted changes')
assert(
!docContentRequest.isDone(),
'should not have requested doc content'
)
done()
}
)
})
})
})
})
})

View file

@ -330,6 +330,10 @@ export function setFailure(failureEntry, callback) {
)
}
export function getFailure(projectId, callback) {
db.projectHistoryFailures.findOne({ project_id: projectId }, callback)
}
export function transferLabelOwnership(fromUser, toUser, callback) {
request.post(
{

View file

@ -411,6 +411,39 @@ describe('SyncManager', function () {
})
})
it('records docs to resync when resyncProjectStructureOnly=true is not set', async function () {
const updates = [this.projectStructureSyncUpdate]
const { updates: filteredUpdates, syncState } =
await this.SyncManager.promises.skipUpdatesDuringSync(
this.projectId,
updates
)
expect(filteredUpdates).to.deep.equal([this.projectStructureSyncUpdate])
expect(syncState.toRaw()).to.deep.equal({
resyncProjectStructure: false,
resyncDocContents: ['new.tex'],
origin: { kind: 'history-resync' },
})
})
it('records no docs to resync with resyncProjectStructureOnly=true', async function () {
this.projectStructureSyncUpdate.resyncProjectStructureOnly = true
const updates = [this.projectStructureSyncUpdate]
const { updates: filteredUpdates, syncState } =
await this.SyncManager.promises.skipUpdatesDuringSync(
this.projectId,
updates
)
expect(filteredUpdates).to.deep.equal([this.projectStructureSyncUpdate])
expect(syncState.toRaw()).to.deep.equal({
resyncProjectStructure: false,
resyncDocContents: [],
origin: { kind: 'history-resync' },
})
})
it('allow project structure updates after project structure sync update', async function () {
const updates = [this.projectStructureSyncUpdate, this.renameUpdate]
const { updates: filteredUpdates, syncState } =

View file

@ -302,6 +302,9 @@ function resyncProjectHistory(
if (opts.historyRangesMigration) {
body.historyRangesMigration = opts.historyRangesMigration
}
if (opts.resyncProjectStructureOnly) {
body.resyncProjectStructureOnly = opts.resyncProjectStructureOnly
}
_makeRequest(
{
path: `/project/${projectId}/history/resync`,

View file

@ -148,6 +148,9 @@ module.exports = HistoryController = {
if (historyRangesMigration) {
opts.historyRangesMigration = historyRangesMigration
}
if (req.body.resyncProjectStructureOnly) {
opts.resyncProjectStructureOnly = req.body.resyncProjectStructureOnly
}
ProjectEntityUpdateHandler.resyncProjectHistory(
projectId,
opts,