Merge pull request #22202 from overleaf/jpa-tpds

[misc] tpds: read files from history-v1 with fallback to filestore

GitOrigin-RevId: afdde47e453b0cb07985392b5e2b03abf348debf
This commit is contained in:
Jakob Ackermann 2024-11-28 12:48:52 +01:00 committed by Copybot
parent cc60acd4ea
commit 892eaabdf7
6 changed files with 99 additions and 18 deletions

View file

@ -596,7 +596,9 @@ const upsertFile = wrapWithLock({
const projectHistoryId = project.overleaf?.history?.id
await TpdsUpdateSender.promises.addFile({
projectId: project._id,
historyId: projectHistoryId,
fileId: fileRef._id,
hash: fileRef.hash,
path: path.fileSystem,
rev: fileRef.rev,
projectName: project.name,
@ -1298,9 +1300,15 @@ const ProjectEntityUpdateHandler = {
})
}
const historyId = project?.overleaf?.history?.id
if (!historyId) {
throw new OError('project does not have a history id', { projectId })
}
await TpdsUpdateSender.promises.addFile({
projectId,
historyId,
fileId: fileRef._id,
hash: fileRef.hash,
path: result?.path?.fileSystem,
projectName: project.name,
rev: fileRef.rev,
@ -1346,7 +1354,9 @@ const ProjectEntityUpdateHandler = {
const projectHistoryId = project.overleaf?.history?.id
await TpdsUpdateSender.promises.addFile({
projectId: project._id,
historyId: projectHistoryId,
fileId: updatedFileRef._id,
hash: updatedFileRef.hash,
path: path.fileSystem,
rev: updatedFileRef.rev,
projectName: project.name,

View file

@ -5,6 +5,7 @@ const ProjectGetter = require('../Project/ProjectGetter')
const ProjectEntityHandler = require('../Project/ProjectEntityHandler')
const { Project } = require('../../models/Project')
const TpdsUpdateSender = require('./TpdsUpdateSender')
const OError = require('@overleaf/o-error')
module.exports = {
flushProjectToTpds: callbackify(flushProjectToTpds),
@ -24,6 +25,7 @@ async function flushProjectToTpds(projectId) {
const project = await ProjectGetter.promises.getProject(projectId, {
name: true,
deferredTpdsFlushCounter: true,
'overleaf.history.id': 1,
})
await _flushProjectToTpds(project)
}
@ -37,6 +39,7 @@ async function flushProjectToTpdsIfNeeded(projectId) {
const project = await ProjectGetter.promises.getProject(projectId, {
name: true,
deferredTpdsFlushCounter: true,
'overleaf.history.id': 1,
})
if (project.deferredTpdsFlushCounter > 0) {
await _flushProjectToTpds(project)
@ -44,6 +47,11 @@ async function flushProjectToTpdsIfNeeded(projectId) {
}
async function _flushProjectToTpds(project) {
const historyId = project?.overleaf?.history?.id
if (!historyId) {
const projectId = project._id
throw new OError('project does not have a history id', { projectId })
}
logger.debug({ projectId: project._id }, 'flushing project to TPDS')
logger.debug({ projectId: project._id }, 'finished flushing project to TPDS')
await DocumentUpdaterHandler.promises.flushProjectToMongo(project._id)
@ -64,7 +72,9 @@ async function _flushProjectToTpds(project) {
for (const [filePath, file] of Object.entries(files)) {
await TpdsUpdateSender.promises.addFile({
projectId: project._id,
historyId,
fileId: file._id,
hash: file.hash,
path: filePath,
projectName: project.name,
rev: file.rev,

View file

@ -41,6 +41,7 @@ async function addEntity(params) {
rev,
folderId,
streamOrigin,
streamFallback,
entityId,
entityType,
} = params
@ -60,6 +61,7 @@ async function addEntity(params) {
uri: buildTpdsUrl(userId, projectName, path),
title: 'addFile',
streamOrigin,
streamFallback,
}
await enqueue(userId, 'pipeStreamFrom', job)
@ -68,8 +70,21 @@ async function addEntity(params) {
async function addFile(params) {
metrics.inc('tpds.add-file')
const { projectId, fileId, path, projectName, rev, folderId } = params
const {
projectId,
historyId,
fileId,
hash,
path,
projectName,
rev,
folderId,
} = params
// Go through project-history to avoid the need for handling history-v1 authentication.
const streamOrigin =
settings.apis.project_history.url +
Path.join(`/project/${historyId}/blob/${hash}`)
const streamFallback =
settings.apis.filestore.url +
Path.join(`/project/${projectId}`, `/file/${fileId}`)
@ -80,6 +95,7 @@ async function addFile(params) {
rev,
folderId,
streamOrigin,
streamFallback,
entityId: fileId,
entityType: 'file',
})

View file

@ -682,6 +682,7 @@ describe('ProjectEntityUpdateHandler', function () {
this.newFile = {
_id: fileId,
hash: 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
rev: 0,
name: this.fileName,
linkedFileData: this.linkedFileData,
@ -734,8 +735,10 @@ describe('ProjectEntityUpdateHandler', function () {
this.TpdsUpdateSender.promises.addFile
.calledWith({
projectId,
historyId: this.project.overleaf.history.id,
projectName: this.project.name,
fileId,
hash: this.newFile.hash,
rev: 0,
path: this.path,
folderId,
@ -777,6 +780,7 @@ describe('ProjectEntityUpdateHandler', function () {
this.newFile = {
_id: fileId,
hash: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
rev: 0,
name: this.fileName,
linkedFileData: this.linkedFileData,
@ -835,7 +839,11 @@ describe('ProjectEntityUpdateHandler', function () {
describe('updating an existing doc', function () {
beforeEach(function (done) {
this.existingDoc = { _id: docId, name: this.docName }
this.existingFile = { _id: fileId, name: this.fileName }
this.existingFile = {
_id: fileId,
name: this.fileName,
hash: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
}
this.folder = {
_id: folderId,
docs: [this.existingDoc],
@ -1120,7 +1128,12 @@ describe('ProjectEntityUpdateHandler', function () {
describe('updating an existing file', function () {
beforeEach(function (done) {
this.existingFile = { _id: fileId, name: this.fileName, rev: 1 }
this.newFile = { _id: new ObjectId(), name: this.fileName, rev: 3 }
this.newFile = {
_id: new ObjectId(),
name: this.fileName,
rev: 3,
hash: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
}
this.folder = { _id: folderId, fileRefs: [this.existingFile], docs: [] }
this.ProjectLocator.promises.findElement.resolves({
element: this.folder,
@ -1172,8 +1185,10 @@ describe('ProjectEntityUpdateHandler', function () {
it('notifies the tpds', function () {
this.TpdsUpdateSender.promises.addFile.should.have.been.calledWith({
projectId,
historyId: this.project.overleaf.history.id,
projectName: this.project.name,
fileId: this.newFile._id,
hash: this.newFile.hash,
rev: this.newFile.rev,
path: this.fileSystemPath,
folderId,
@ -1222,7 +1237,10 @@ describe('ProjectEntityUpdateHandler', function () {
describe('creating a new file', function () {
beforeEach(function (done) {
this.folder = { _id: folderId, fileRefs: [], docs: [] }
this.newFile = { _id: fileId }
this.newFile = {
_id: fileId,
hash: 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
}
this.ProjectLocator.promises.findElement.resolves({
element: this.folder,
})

View file

@ -9,7 +9,7 @@ const MODULE_PATH =
describe('TpdsProjectFlusher', function () {
beforeEach(function () {
this.project = { _id: new ObjectId() }
this.project = { _id: new ObjectId(), overleaf: { history: { id: 42 } } }
this.folder = { _id: new ObjectId() }
this.docs = {
'/doc/one': {
@ -26,8 +26,18 @@ describe('TpdsProjectFlusher', function () {
},
}
this.files = {
'/file/one': { _id: 'mock-file-1', rev: 7, folder: this.folder },
'/file/two': { _id: 'mock-file-2', rev: 8, folder: this.folder },
'/file/one': {
_id: 'mock-file-1',
rev: 7,
folder: this.folder,
hash: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
},
'/file/two': {
_id: 'mock-file-2',
rev: 8,
folder: this.folder,
hash: 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
},
}
this.DocumentUpdaterHandler = {
promises: {
@ -107,7 +117,9 @@ describe('TpdsProjectFlusher', function () {
this.TpdsUpdateSender.promises.addFile
).to.have.been.calledWith({
projectId: this.project._id,
historyId: this.project.overleaf.history.id,
fileId: file._id,
hash: file.hash,
projectName: this.project.name,
rev: file.rev,
path,
@ -235,7 +247,9 @@ describe('TpdsProjectFlusher', function () {
this.TpdsUpdateSender.promises.addFile
).to.have.been.calledWith({
projectId: this.project._id,
historyId: this.project.overleaf.history.id,
fileId: file._id,
hash: file.hash,
projectName: this.project.name,
rev: file.rev,
path,

View file

@ -18,6 +18,7 @@ const projectName = 'project_name_here'
const thirdPartyDataStoreApiUrl = 'http://third-party-json-store.herokuapp.com'
const siteUrl = 'http://127.0.0.1:3000'
const filestoreUrl = 'filestore.overleaf.com'
const projectHistoryUrl = 'http://project-history:3054'
describe('TpdsUpdateSender', function () {
beforeEach(function () {
@ -52,6 +53,9 @@ describe('TpdsUpdateSender', function () {
docstore: {
pubUrl: this.docstoreUrl,
},
project_history: {
url: projectHistoryUrl,
},
},
}
const getUsers = sinon.stub()
@ -113,11 +117,15 @@ describe('TpdsUpdateSender', function () {
it('queues a post the file with user and file id', async function () {
const fileId = '4545345'
const hash = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
const historyId = 91525
const path = '/some/path/here.jpg'
await this.TpdsUpdateSender.promises.addFile({
projectId,
historyId,
fileId,
hash,
path,
projectName,
})
@ -130,7 +138,8 @@ describe('TpdsUpdateSender', function () {
method: 'pipeStreamFrom',
job: {
method: 'post',
streamOrigin: `${filestoreUrl}/project/${projectId}/file/${fileId}`,
streamOrigin: `${projectHistoryUrl}/project/${historyId}/blob/${hash}`,
streamFallback: `${filestoreUrl}/project/${projectId}/file/${fileId}`,
uri: `${thirdPartyDataStoreApiUrl}/user/${userId}/entity/${encodeURIComponent(
projectName
)}${encodeURIComponent(path)}`,
@ -412,18 +421,22 @@ describe('TpdsUpdateSender', function () {
})
.resolves([])
})
})
it('does not make request to tpds', async function () {
const fileId = '4545345'
const path = '/some/path/here.jpg'
it('does not make request to tpds', async function () {
const fileId = '4545345'
const hash = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
const historyId = 91525
const path = '/some/path/here.jpg'
await this.TpdsUpdateSender.promises.addFile({
projectId,
fileId,
path,
projectName,
await this.TpdsUpdateSender.promises.addFile({
projectId,
historyId,
hash,
fileId,
path,
projectName,
})
this.FetchUtils.fetchNothing.should.not.have.been.called
})
this.FetchUtils.fetchNothing.should.not.have.been.called
})
})