Merge pull request #22421 from overleaf/ar-pass-created-blob-for-filetree-uploads

[web] Pass createdBlob through for file creation everywhere

GitOrigin-RevId: 880bebd0a55e351a6a61555a46e80faef22db7cb
This commit is contained in:
Andrew Rumble 2024-12-11 10:26:49 +00:00 committed by Copybot
parent f1c3ddb7c2
commit e6a2caa088
5 changed files with 227 additions and 101 deletions

View file

@ -246,6 +246,23 @@ function createBlobFromString(historyId, data, fileId, callback) {
)
}
function _checkBlobExists(historyId, hash, callback) {
const url = `${Settings.overleaf.history.host}/projects/${historyId}/blobs/${hash}`
fetchNothing(url, {
method: 'HEAD',
...getHistoryFetchOptions(),
})
.then(res => {
callback(null, true)
})
.catch(err => {
if (err instanceof RequestFailedError) {
return callback(null, false)
}
callback(OError.tag(err), false)
})
}
export function createBlobForUpdate(projectId, historyId, update, callback) {
callback = _.once(callback)
@ -304,49 +321,30 @@ export function createBlobForUpdate(projectId, historyId, update, callback) {
const fileId = urlMatch[2]
const filestoreURL = `${Settings.apis.filestore.url}/project/${projectId}/file/${fileId}`
if (update.createdBlob) {
logger.debug(
{ projectId, fileId },
'Skipping blob creation as it has already been created'
)
return callback(null, { file: update.hash })
}
fetchStream(filestoreURL, {
signal: AbortSignal.timeout(HTTP_REQUEST_TIMEOUT),
})
.then(stream => {
LocalFileWriter.bufferOnDisk(
stream,
filestoreURL,
`project-${projectId}-file-${fileId}`,
(fsPath, cb) => {
_createBlob(historyId, fsPath, cb)
},
(err, fileHash) => {
if (err) {
return callback(err)
}
if (update.hash && update.hash !== fileHash) {
logger.warn(
{ projectId, fileId, webHash: update.hash, fileHash },
'hash mismatch between web and project-history'
)
}
logger.debug({ fileHash }, 'created blob for file')
callback(null, { file: fileHash })
}
_checkBlobExists(historyId, update.hash, (err, blobExists) => {
if (err) {
logger.warn(
{ err, projectId, fileId, update },
'error checking whether blob exists, reading from filestore'
)
} else if (update.createdBlob && blobExists) {
logger.debug(
{ projectId, fileId, update },
'Skipping blob creation as it has already been created'
)
return callback(null, { file: update.hash })
} else if (update.createdBlob) {
logger.warn(
{ projectId, fileId, update },
'created blob does not exist, reading from filestore'
)
}
fetchStream(filestoreURL, {
signal: AbortSignal.timeout(HTTP_REQUEST_TIMEOUT),
})
.catch(err => {
if (err instanceof RequestFailedError && err.response.status === 404) {
logger.warn(
{ projectId, historyId, filestoreURL },
'File contents not found in filestore. Storing in history as an empty file'
)
const emptyStream = new StringStream()
.then(stream => {
LocalFileWriter.bufferOnDisk(
emptyStream,
stream,
filestoreURL,
`project-${projectId}-file-${fileId}`,
(fsPath, cb) => {
@ -356,15 +354,48 @@ export function createBlobForUpdate(projectId, historyId, update, callback) {
if (err) {
return callback(err)
}
logger.debug({ fileHash }, 'created empty blob for file')
if (update.hash && update.hash !== fileHash) {
logger.warn(
{ projectId, fileId, webHash: update.hash, fileHash },
'hash mismatch between web and project-history'
)
}
logger.debug({ fileHash }, 'created blob for file')
callback(null, { file: fileHash })
}
)
emptyStream.push(null) // send an EOF signal
} else {
callback(OError.tag(err, 'error from filestore', { filestoreURL }))
}
})
})
.catch(err => {
if (
err instanceof RequestFailedError &&
err.response.status === 404
) {
logger.warn(
{ projectId, historyId, filestoreURL },
'File contents not found in filestore. Storing in history as an empty file'
)
const emptyStream = new StringStream()
LocalFileWriter.bufferOnDisk(
emptyStream,
filestoreURL,
`project-${projectId}-file-${fileId}`,
(fsPath, cb) => {
_createBlob(historyId, fsPath, cb)
},
(err, fileHash) => {
if (err) {
return callback(err)
}
logger.debug({ fileHash }, 'created empty blob for file')
callback(null, { file: fileHash })
}
)
emptyStream.push(null) // send an EOF signal
} else {
callback(OError.tag(err, 'error from filestore', { filestoreURL }))
}
})
})
} else {
const error = new OError('invalid update for blob creation')
callback(error)

View file

@ -492,7 +492,7 @@ describe('HistoryStoreManager', function () {
})
})
describe('when the createdBlob flag is set on the update', function () {
beforeEach(function (done) {
beforeEach(function () {
this.file_id = '012345678901234567890123'
this.update = {
file: true,
@ -500,33 +500,85 @@ describe('HistoryStoreManager', function () {
url: `http://filestore.other.cloud.provider/project/${this.projectId}/file/${this.file_id}`,
hash: this.hash,
}
this.HistoryStoreManager.createBlobForUpdate(
this.projectId,
this.historyId,
this.update,
(err, { file: hash }) => {
if (err) {
return done(err)
})
describe('when history-v1 confirms that the blob exists', function () {
beforeEach(function (done) {
this.HistoryStoreManager.createBlobForUpdate(
this.projectId,
this.historyId,
this.update,
(err, { file: hash }) => {
if (err) {
return done(err)
}
this.actualHash = hash
done()
}
this.actualHash = hash
done()
}
)
})
)
})
it('should call the callback with the existing hash', function () {
expect(this.actualHash).to.equal(this.hash)
})
it('should call the callback with the existing hash', function () {
expect(this.actualHash).to.equal(this.hash)
})
it('should not request the file from the filestore', function () {
expect(this.FetchUtils.fetchStream).to.not.have.been.called
})
it('should not request the file from the filestore', function () {
expect(this.FetchUtils.fetchStream).to.not.have.been.called
})
it('should log a debug level message', function () {
expect(this.logger.debug).to.have.been.calledWith(
{ projectId: this.projectId, fileId: this.file_id },
'Skipping blob creation as it has already been created'
)
it('should log a debug level message', function () {
expect(this.logger.debug).to.have.been.calledWith(
{
projectId: this.projectId,
fileId: this.file_id,
update: this.update,
},
'Skipping blob creation as it has already been created'
)
})
})
describe('when history-v1 does not confirm that the blob exists', function () {
beforeEach(function (done) {
this.FetchUtils.fetchNothing.rejects(
new RequestFailedError(
`${this.settings.overleaf.history.host}/project/${this.projectId}/file/${this.file_id}`,
{ method: 'HEAD' },
{ status: 404 }
)
)
this.HistoryStoreManager.createBlobForUpdate(
this.projectId,
this.historyId,
this.update,
(err, { file: hash }) => {
if (err) {
return done(err)
}
this.actualHash = hash
done()
}
)
})
it('should warn that we will use the filestore', function () {
expect(this.logger.warn).to.have.been.calledWithMatch(
{
fileId: this.file_id,
projectId: this.projectId,
update: this.update,
},
'created blob does not exist, reading from filestore'
)
})
it('should request the file from the filestore in settings', function () {
expect(this.FetchUtils.fetchStream).to.have.been.calledWithMatch(
`${this.settings.apis.filestore.url}/project/${this.projectId}/file/${this.file_id}`
)
})
it('should call the callback with the blob', function () {
expect(this.actualHash).to.equal(this.hash)
})
})
})
})

View file

@ -302,7 +302,7 @@ const FileStoreHandler = {
module.exports = FileStoreHandler
module.exports.promises = promisifyAll(FileStoreHandler, {
multiResult: {
uploadFileFromDisk: ['url', 'fileRef'],
uploadFileFromDisk: ['url', 'fileRef', 'createdBlob'],
uploadFileFromDiskWithHistoryId: ['url', 'fileRef', 'createdBlob'],
},
})

View file

@ -332,13 +332,14 @@ const addFile = wrapWithLock({
if (!SafePath.isCleanFilename(fileName)) {
throw new Errors.InvalidNameError('invalid element name')
}
const { url, fileRef } = await ProjectEntityUpdateHandler._uploadFile(
projectId,
folderId,
fileName,
fsPath,
linkedFileData
)
const { url, fileRef, createdBlob } =
await ProjectEntityUpdateHandler._uploadFile(
projectId,
folderId,
fileName,
fsPath,
linkedFileData
)
return {
projectId,
@ -346,6 +347,7 @@ const addFile = wrapWithLock({
userId,
fileRef,
fileStoreUrl: url,
createdBlob,
source,
}
},
@ -355,6 +357,7 @@ const addFile = wrapWithLock({
userId,
fileRef,
fileStoreUrl,
createdBlob,
source,
}) {
const { result, project } =
@ -366,6 +369,7 @@ const addFile = wrapWithLock({
const projectHistoryId = project.overleaf?.history?.id
const newFiles = [
{
createdBlob,
file: fileRef,
path: result && result.path && result.path.fileSystem,
url: fileStoreUrl,
@ -384,7 +388,7 @@ const addFile = wrapWithLock({
.catch(error => {
logger.error({ error }, 'failed to mark project as updated')
})
return { fileRef, folderId }
return { fileRef, folderId, createdBlob }
},
})
@ -529,11 +533,12 @@ const upsertFile = wrapWithLock({
name: fileName,
linkedFileData,
}
const { url, fileRef } = await FileStoreHandler.promises.uploadFileFromDisk(
projectId,
fileArgs,
fsPath
)
const { url, fileRef, createdBlob } =
await FileStoreHandler.promises.uploadFileFromDisk(
projectId,
fileArgs,
fsPath
)
return {
projectId,
@ -544,6 +549,7 @@ const upsertFile = wrapWithLock({
userId,
fileRef,
fileStoreUrl: url,
createdBlob,
source,
}
},
@ -554,6 +560,7 @@ const upsertFile = wrapWithLock({
userId,
fileRef,
fileStoreUrl,
createdBlob,
source,
}) {
let element
@ -613,6 +620,7 @@ const upsertFile = wrapWithLock({
newFiles: [
{
createdBlob,
file: fileRef,
path: path.fileSystem,
url: fileStoreUrl,
@ -637,7 +645,8 @@ const upsertFile = wrapWithLock({
fileRef,
fileStoreUrl,
folderId,
source
source,
createdBlob
)
return { fileRef, isNew: false, oldFileRef: existingFile }
@ -649,6 +658,7 @@ const upsertFile = wrapWithLock({
userId,
fileRef,
fileStoreUrl,
createdBlob,
source,
})
@ -706,12 +716,15 @@ const upsertFileWithPath = wrapWithLock({
name: fileName,
linkedFileData,
}
const { url: fileStoreUrl, fileRef } =
await FileStoreHandler.promises.uploadFileFromDisk(
projectId,
fileArgs,
fsPath
)
const {
url: fileStoreUrl,
fileRef,
createdBlob,
} = await FileStoreHandler.promises.uploadFileFromDisk(
projectId,
fileArgs,
fsPath
)
return {
projectId,
@ -722,6 +735,7 @@ const upsertFileWithPath = wrapWithLock({
userId,
fileRef,
fileStoreUrl,
createdBlob,
source,
}
},
@ -734,6 +748,7 @@ const upsertFileWithPath = wrapWithLock({
userId,
fileRef,
fileStoreUrl,
createdBlob,
source,
}) {
const { newFolders, folder } =
@ -755,6 +770,7 @@ const upsertFileWithPath = wrapWithLock({
userId,
fileRef,
fileStoreUrl,
createdBlob,
source,
})
@ -1042,12 +1058,15 @@ const convertDocToFile = wrapWithLock({
}
await DocumentUpdaterHandler.promises.deleteDoc(projectId, docId, false)
const fsPath = await FileWriter.promises.writeLinesToDisk(projectId, lines)
const { url: fileStoreUrl, fileRef } =
await FileStoreHandler.promises.uploadFileFromDisk(
projectId,
{ name: doc.name, rev: rev + 1 },
fsPath
)
const {
url: fileStoreUrl,
fileRef,
createdBlob,
} = await FileStoreHandler.promises.uploadFileFromDisk(
projectId,
{ name: doc.name, rev: rev + 1 },
fsPath
)
try {
await fs.promises.unlink(fsPath)
} catch (err) {
@ -1061,6 +1080,7 @@ const convertDocToFile = wrapWithLock({
fileStoreUrl,
userId,
source,
createdBlob,
}
},
async withLock({
@ -1071,6 +1091,7 @@ const convertDocToFile = wrapWithLock({
fileStoreUrl,
userId,
source,
createdBlob,
}) {
const project =
await ProjectEntityMongoUpdateHandler.promises.replaceDocWithFile(
@ -1085,7 +1106,7 @@ const convertDocToFile = wrapWithLock({
userId,
{
oldDocs: [{ doc, path }],
newFiles: [{ file: fileRef, path, url: fileStoreUrl }],
newFiles: [{ file: fileRef, path, url: fileStoreUrl, createdBlob }],
newProject: project,
},
source
@ -1149,7 +1170,11 @@ const ProjectEntityUpdateHandler = {
'folderId',
]),
addFile: callbackifyMultiResult(addFile, ['fileRef', 'folderId']),
addFile: callbackifyMultiResult(addFile, [
'fileRef',
'folderId',
'createdBlob',
]),
addFolder: callbackifyMultiResult(addFolder, ['folder', 'parentFolderId']),
@ -1324,7 +1349,8 @@ const ProjectEntityUpdateHandler = {
newFileRef,
fileStoreUrl,
folderId,
source
source,
createdBlob
) {
const {
oldFileRef,
@ -1347,6 +1373,7 @@ const ProjectEntityUpdateHandler = {
const newFiles = [
{
file: updatedFileRef,
createdBlob,
path: path.fileSystem,
url: fileStoreUrl,
},

View file

@ -690,6 +690,7 @@ describe('ProjectEntityUpdateHandler', function () {
this.FileStoreHandler.promises.uploadFileFromDisk.resolves({
url: this.fileUrl,
fileRef: this.newFile,
createdBlob: true,
})
this.TpdsUpdateSender.promises.addFile.resolves()
this.ProjectEntityMongoUpdateHandler.promises.addFile.resolves({
@ -759,6 +760,7 @@ describe('ProjectEntityUpdateHandler', function () {
file: this.newFile,
path: this.path,
url: this.fileUrl,
createdBlob: true,
},
]
this.DocumentUpdaterHandler.promises.updateProjectStructure.should.have.been.calledWith(
@ -1098,6 +1100,7 @@ describe('ProjectEntityUpdateHandler', function () {
this.FileStoreHandler.promises.uploadFileFromDisk.resolves({
url: this.fileUrl,
fileRef: this.file,
createdBlob: true,
})
})
@ -1214,6 +1217,7 @@ describe('ProjectEntityUpdateHandler', function () {
file: this.newFile,
path: this.fileSystemPath,
url: this.fileUrl,
createdBlob: true,
},
]
this.DocumentUpdaterHandler.promises.updateProjectStructure.should.have.been.calledWith(
@ -1247,6 +1251,7 @@ describe('ProjectEntityUpdateHandler', function () {
this.FileStoreHandler.promises.uploadFileFromDisk.resolves({
url: this.fileUrl,
fileRef: this.newFile,
createdBlob: true,
})
this.ProjectEntityUpdateHandler.promises.addFile = {
mainTask: sinon.stub().resolves(this.newFile),
@ -1285,6 +1290,7 @@ describe('ProjectEntityUpdateHandler', function () {
fileRef: this.newFile,
fileStoreUrl: this.fileUrl,
source: this.source,
createdBlob: true,
})
})
@ -1367,6 +1373,7 @@ describe('ProjectEntityUpdateHandler', function () {
this.FileStoreHandler.promises.uploadFileFromDisk.resolves({
url: this.newFileUrl,
fileRef: this.newFile,
createdBlob: true,
})
this.ProjectEntityMongoUpdateHandler.promises.replaceDocWithFile.resolves(
this.newProject
@ -1402,6 +1409,7 @@ describe('ProjectEntityUpdateHandler', function () {
file: this.newFile,
path: this.path,
url: this.newFileUrl,
createdBlob: true,
},
]
const updates = {
@ -1583,6 +1591,7 @@ describe('ProjectEntityUpdateHandler', function () {
this.FileStoreHandler.promises.uploadFileFromDisk.resolves({
url: this.fileUrl,
fileRef: this.newFile,
createdBlob: true,
})
this.ProjectEntityUpdateHandler.promises.mkdirp = {
withoutLock: sinon
@ -1627,6 +1636,7 @@ describe('ProjectEntityUpdateHandler', function () {
fileRef: this.newFile,
fileStoreUrl: this.fileUrl,
source: this.source,
createdBlob: true,
}
)
})
@ -2907,6 +2917,7 @@ describe('ProjectEntityUpdateHandler', function () {
this.FileStoreHandler.promises.uploadFileFromDisk.resolves({
url: this.fileStoreUrl,
fileRef: this.file,
createdBlob: true,
})
this.ProjectEntityMongoUpdateHandler.promises.replaceDocWithFile.resolves(
this.project
@ -2962,7 +2973,12 @@ describe('ProjectEntityUpdateHandler', function () {
{
oldDocs: [{ doc: this.doc, path: this.path }],
newFiles: [
{ file: this.file, path: this.path, url: this.fileStoreUrl },
{
file: this.file,
path: this.path,
url: this.fileStoreUrl,
createdBlob: true,
},
],
newProject: this.project,
},