Merge pull request #22096 from overleaf/bg-issue21795

Extend file upload  to create history blobs

GitOrigin-RevId: b3efac7f58d4c027ca9131867e761837ce63facb
This commit is contained in:
Brian Gough 2024-11-25 15:01:31 +00:00 committed by Copybot
parent ef72402df6
commit fe7ce48e48
6 changed files with 232 additions and 68 deletions

View file

@ -5,6 +5,8 @@ const request = require('request')
const settings = require('@overleaf/settings')
const Async = require('async')
const FileHashManager = require('./FileHashManager')
const HistoryManager = require('../History/HistoryManager')
const ProjectDetailsHandler = require('../Project/ProjectDetailsHandler')
const { File } = require('../../models/File')
const Errors = require('../Errors/Errors')
const OError = require('@overleaf/o-error')
@ -17,6 +19,32 @@ const FileStoreHandler = {
RETRY_ATTEMPTS: 3,
uploadFileFromDisk(projectId, fileArgs, fsPath, callback) {
// Look up the history id for the project if we don't have it already
ProjectDetailsHandler.getDetails(projectId, function (err, project) {
if (err) {
return callback(err)
}
const historyId = project.overleaf?.history?.id
if (!historyId) {
return callback(new OError('missing history id'))
}
FileStoreHandler.uploadFileFromDiskWithHistoryId(
projectId,
historyId,
fileArgs,
fsPath,
callback
)
})
},
uploadFileFromDiskWithHistoryId(
projectId,
historyId,
fileArgs,
fsPath,
callback
) {
fs.lstat(fsPath, function (err, stat) {
if (err) {
logger.warn({ err, projectId, fileArgs, fsPath }, 'error stating file')
@ -36,76 +64,93 @@ const FileStoreHandler = {
)
return callback(new Error('can not upload symlink'))
}
Async.retry(
FileStoreHandler.RETRY_ATTEMPTS,
(cb, results) =>
FileStoreHandler._doUploadFileFromDisk(
projectId,
fileArgs,
fsPath,
cb
),
function (err, result) {
if (err) {
OError.tag(err, 'Error uploading file, retries failed', {
projectId,
fileArgs,
})
return callback(err)
}
callback(err, result.url, result.fileRef)
FileHashManager.computeHash(fsPath, function (err, hash) {
if (err) {
return callback(err)
}
)
Async.retry(
FileStoreHandler.RETRY_ATTEMPTS,
cb =>
HistoryManager.uploadBlobFromDisk(
historyId,
hash,
stat.size,
fsPath,
cb
),
function (err) {
if (err) {
return callback(err)
}
fileArgs = { ...fileArgs, hash }
Async.retry(
FileStoreHandler.RETRY_ATTEMPTS,
(cb, results) =>
FileStoreHandler._doUploadFileFromDisk(
projectId,
fileArgs,
fsPath,
cb
),
function (err, result) {
if (err) {
OError.tag(err, 'Error uploading file, retries failed', {
projectId,
fileArgs,
})
return callback(err)
}
callback(err, result.url, result.fileRef)
}
)
}
)
})
})
},
_doUploadFileFromDisk(projectId, fileArgs, fsPath, callback) {
const callbackOnce = _.once(callback)
FileHashManager.computeHash(fsPath, function (err, hashValue) {
if (err) {
return callbackOnce(err)
const fileRef = new File(fileArgs)
const fileId = fileRef._id
const readStream = fs.createReadStream(fsPath)
readStream.on('error', function (err) {
logger.warn(
{ err, projectId, fileId, fsPath },
'something went wrong on the read stream of uploadFileFromDisk'
)
callbackOnce(err)
})
readStream.on('open', function () {
const url = FileStoreHandler._buildUrl(projectId, fileId)
const opts = {
method: 'post',
uri: url,
timeout: FIVE_MINS_IN_MS,
headers: {
'X-File-Hash-From-Web': fileArgs.hash,
}, // send the hash to the filestore as a custom header so it can be checked
}
const fileRef = new File(Object.assign({}, fileArgs, { hash: hashValue }))
const fileId = fileRef._id
const readStream = fs.createReadStream(fsPath)
readStream.on('error', function (err) {
const writeStream = request(opts)
writeStream.on('error', function (err) {
logger.warn(
{ err, projectId, fileId, fsPath },
'something went wrong on the read stream of uploadFileFromDisk'
'something went wrong on the write stream of uploadFileFromDisk'
)
callbackOnce(err)
})
readStream.on('open', function () {
const url = FileStoreHandler._buildUrl(projectId, fileId)
const opts = {
method: 'post',
uri: url,
timeout: FIVE_MINS_IN_MS,
headers: {
'X-File-Hash-From-Web': hashValue,
}, // send the hash to the filestore as a custom header so it can be checked
}
const writeStream = request(opts)
writeStream.on('error', function (err) {
logger.warn(
{ err, projectId, fileId, fsPath },
'something went wrong on the write stream of uploadFileFromDisk'
writeStream.on('response', function (response) {
if (![200, 201].includes(response.statusCode)) {
const err = new OError(
`non-ok response from filestore for upload: ${response.statusCode}`,
{ statusCode: response.statusCode }
)
callbackOnce(err)
})
writeStream.on('response', function (response) {
if (![200, 201].includes(response.statusCode)) {
err = new OError(
`non-ok response from filestore for upload: ${response.statusCode}`,
{ statusCode: response.statusCode }
)
return callbackOnce(err)
}
callbackOnce(null, { url, fileRef })
}) // have to pass back an object because async.retry only accepts a single result argument
readStream.pipe(writeStream)
})
return callbackOnce(err)
}
callbackOnce(null, { url, fileRef })
}) // have to pass back an object because async.retry only accepts a single result argument
readStream.pipe(writeStream)
})
},
@ -258,5 +303,6 @@ module.exports = FileStoreHandler
module.exports.promises = promisifyAll(FileStoreHandler, {
multiResult: {
uploadFileFromDisk: ['url', 'fileRef'],
uploadFileFromDiskWithHistoryId: ['url', 'fileRef'],
},
})

View file

@ -1,5 +1,6 @@
const { callbackify } = require('util')
const { fetchJson, fetchNothing } = require('@overleaf/fetch-utils')
const fs = require('fs')
const settings = require('@overleaf/settings')
const OError = require('@overleaf/o-error')
const UserGetter = require('../User/UserGetter')
@ -113,6 +114,22 @@ async function _deleteProjectInFullProjectHistory(historyId) {
}
}
async function uploadBlobFromDisk(historyId, hash, byteLength, fsPath) {
const outStream = fs.createReadStream(fsPath)
const url = `${settings.apis.v1_history.url}/projects/${historyId}/blobs/${hash}`
await fetchNothing(url, {
method: 'PUT',
body: outStream,
headers: { 'Content-Length': byteLength }, // add the content length to work around problems with chunked encoding in node 18
signal: AbortSignal.timeout(60 * 1000),
basicAuth: {
user: settings.apis.v1_history.user,
password: settings.apis.v1_history.pass,
},
})
}
/**
* Warning: Don't use this method for large projects. It will eagerly load all
* the history data and apply all operations.
@ -265,6 +282,7 @@ module.exports = {
deleteProjectHistory: callbackify(deleteProjectHistory),
injectUserDetails: callbackify(injectUserDetails),
getCurrentContent: callbackify(getCurrentContent),
uploadBlobFromDisk: callbackify(uploadBlobFromDisk),
promises: {
initializeProject,
flushProject,
@ -274,5 +292,6 @@ module.exports = {
deleteProjectHistory,
getCurrentContent,
getContentAtVersion,
uploadBlobFromDisk,
},
}

View file

@ -15,6 +15,7 @@ const ProjectDetailsHandler = require('../Project/ProjectDetailsHandler')
const ProjectDeleter = require('../Project/ProjectDeleter')
const TpdsProjectFlusher = require('../ThirdPartyDataStore/TpdsProjectFlusher')
const logger = require('@overleaf/logger')
const OError = require('@overleaf/o-error')
module.exports = {
createProjectFromZipArchive: callbackify(createProjectFromZipArchive),
@ -124,7 +125,7 @@ async function _initializeProjectWithZipContents(
const importEntries =
await FileSystemImportManager.promises.importDir(topLevelDir)
const { fileEntries, docEntries } = await _createEntriesFromImports(
project._id,
project,
importEntries
)
const projectVersion =
@ -141,14 +142,14 @@ async function _initializeProjectWithZipContents(
await TpdsProjectFlusher.promises.flushProjectToTpds(project._id)
}
async function _createEntriesFromImports(projectId, importEntries) {
async function _createEntriesFromImports(project, importEntries) {
const fileEntries = []
const docEntries = []
for (const importEntry of importEntries) {
switch (importEntry.type) {
case 'doc': {
const docEntry = await _createDoc(
projectId,
project,
importEntry.projectPath,
importEntry.lines
)
@ -157,7 +158,7 @@ async function _createEntriesFromImports(projectId, importEntries) {
}
case 'file': {
const fileEntry = await _createFile(
projectId,
project,
importEntry.projectPath,
importEntry.fsPath
)
@ -172,7 +173,8 @@ async function _createEntriesFromImports(projectId, importEntries) {
return { fileEntries, docEntries }
}
async function _createDoc(projectId, projectPath, docLines) {
async function _createDoc(project, projectPath, docLines) {
const projectId = project._id
const docName = Path.basename(projectPath)
const doc = new Doc({ name: docName })
await DocstoreManager.promises.updateDoc(
@ -185,13 +187,20 @@ async function _createDoc(projectId, projectPath, docLines) {
return { doc, path: projectPath, docLines: docLines.join('\n') }
}
async function _createFile(projectId, projectPath, fsPath) {
async function _createFile(project, projectPath, fsPath) {
const projectId = project._id
const historyId = project.overleaf?.history?.id
if (!historyId) {
throw new OError('missing history id')
}
const fileName = Path.basename(projectPath)
const { fileRef, url } = await FileStoreHandler.promises.uploadFileFromDisk(
projectId,
{ name: fileName },
fsPath
)
const { fileRef, url } =
await FileStoreHandler.promises.uploadFileFromDiskWithHistoryId(
projectId,
historyId,
{ name: fileName },
fsPath
)
return { file: fileRef, path: projectPath, url }
}

View file

@ -78,6 +78,10 @@ class MockV1HistoryApi extends AbstractMockApi {
this.app.delete('/api/projects/:project_id', (req, res, next) => {
res.sendStatus(204)
})
this.app.put('/api/projects/:project_id/blobs/:hash', (req, res, next) => {
res.sendStatus(204)
})
}
}

View file

@ -17,6 +17,7 @@ describe('FileStoreHandler', function () {
isDirectory() {
return false
},
size: this.fileSize,
}),
}
this.writeStream = {
@ -38,6 +39,9 @@ describe('FileStoreHandler', function () {
this.fileArgs = { name: 'upload-filename' }
this.fileId = 'file_id_here'
this.projectId = '1312312312'
this.historyId = 123
this.fileSize = 999
this.hashValue = '2aae6c35c94fcfb415dbe95f408b9ce91ee846ed'
this.fsPath = 'uploads/myfile.eps'
this.getFileUrl = (projectId, fileId) =>
`${this.filestoreUrl}/project/${projectId}/file/${fileId}`
@ -56,10 +60,21 @@ describe('FileStoreHandler', function () {
this.FileHashManager = {
computeHash: sinon.stub().callsArgWith(1, null, this.hashValue),
}
this.HistoryManager = {
uploadBlobFromDisk: sinon.stub().callsArg(4),
}
this.ProjectDetailsHandler = {
getDetails: sinon.stub().callsArgWith(1, null, {
overleaf: { history: { id: this.historyId } },
}),
}
this.handler = SandboxedModule.require(MODULE_PATH, {
requires: {
'@overleaf/settings': this.settings,
request: this.request,
'../History/HistoryManager': this.HistoryManager,
'../Project/ProjectDetailsHandler': this.ProjectDetailsHandler,
'./FileHashManager': this.FileHashManager,
// FIXME: need to stub File object here
'../../models/File': {
@ -75,6 +90,77 @@ describe('FileStoreHandler', function () {
this.request.returns(this.writeStream)
})
it('should get the project details', function (done) {
this.fs.createReadStream.returns({
pipe() {},
on(type, cb) {
if (type === 'open') {
cb()
}
},
})
this.handler.uploadFileFromDisk(
this.projectId,
this.fileArgs,
this.fsPath,
() => {
this.ProjectDetailsHandler.getDetails
.calledWith(this.projectId)
.should.equal(true)
done()
}
)
})
it('should compute the file hash', function (done) {
this.fs.createReadStream.returns({
pipe() {},
on(type, cb) {
if (type === 'open') {
cb()
}
},
})
this.handler.uploadFileFromDisk(
this.projectId,
this.fileArgs,
this.fsPath,
() => {
this.FileHashManager.computeHash
.calledWith(this.fsPath)
.should.equal(true)
done()
}
)
})
it('should upload the file to the history store as a blob', function (done) {
this.fs.createReadStream.returns({
pipe() {},
on(type, cb) {
if (type === 'open') {
cb()
}
},
})
this.handler.uploadFileFromDisk(
this.projectId,
this.fileArgs,
this.fsPath,
() => {
this.HistoryManager.uploadBlobFromDisk
.calledWith(
this.historyId,
this.hashValue,
this.fileSize,
this.fsPath
)
.should.equal(true)
done()
}
)
})
it('should create read stream', function (done) {
this.fs.createReadStream.returns({
pipe() {},

View file

@ -94,7 +94,7 @@ describe('ProjectUploadManager', function () {
}
this.FileStoreHandler = {
promises: {
uploadFileFromDisk: sinon
uploadFileFromDiskWithHistoryId: sinon
.stub()
.resolves({ fileRef: this.file, url: this.fileStoreUrl }),
},