diff --git a/services/web/app/src/Features/FileStore/FileStoreHandler.js b/services/web/app/src/Features/FileStore/FileStoreHandler.js index 18ae9aface..af75f71d84 100644 --- a/services/web/app/src/Features/FileStore/FileStoreHandler.js +++ b/services/web/app/src/Features/FileStore/FileStoreHandler.js @@ -5,6 +5,8 @@ const request = require('request') const settings = require('@overleaf/settings') const Async = require('async') const FileHashManager = require('./FileHashManager') +const HistoryManager = require('../History/HistoryManager') +const ProjectDetailsHandler = require('../Project/ProjectDetailsHandler') const { File } = require('../../models/File') const Errors = require('../Errors/Errors') const OError = require('@overleaf/o-error') @@ -17,6 +19,32 @@ const FileStoreHandler = { RETRY_ATTEMPTS: 3, uploadFileFromDisk(projectId, fileArgs, fsPath, callback) { + // Look up the history id for the project if we don't have it already + ProjectDetailsHandler.getDetails(projectId, function (err, project) { + if (err) { + return callback(err) + } + const historyId = project.overleaf?.history?.id + if (!historyId) { + return callback(new OError('missing history id')) + } + FileStoreHandler.uploadFileFromDiskWithHistoryId( + projectId, + historyId, + fileArgs, + fsPath, + callback + ) + }) + }, + + uploadFileFromDiskWithHistoryId( + projectId, + historyId, + fileArgs, + fsPath, + callback + ) { fs.lstat(fsPath, function (err, stat) { if (err) { logger.warn({ err, projectId, fileArgs, fsPath }, 'error stating file') @@ -36,76 +64,93 @@ const FileStoreHandler = { ) return callback(new Error('can not upload symlink')) } - Async.retry( - FileStoreHandler.RETRY_ATTEMPTS, - (cb, results) => - FileStoreHandler._doUploadFileFromDisk( - projectId, - fileArgs, - fsPath, - cb - ), - function (err, result) { - if (err) { - OError.tag(err, 'Error uploading file, retries failed', { - projectId, - fileArgs, - }) - return callback(err) - } - callback(err, result.url, result.fileRef) + FileHashManager.computeHash(fsPath, function (err, hash) { + if (err) { + return callback(err) } - ) + Async.retry( + FileStoreHandler.RETRY_ATTEMPTS, + cb => + HistoryManager.uploadBlobFromDisk( + historyId, + hash, + stat.size, + fsPath, + cb + ), + function (err) { + if (err) { + return callback(err) + } + fileArgs = { ...fileArgs, hash } + Async.retry( + FileStoreHandler.RETRY_ATTEMPTS, + (cb, results) => + FileStoreHandler._doUploadFileFromDisk( + projectId, + fileArgs, + fsPath, + cb + ), + function (err, result) { + if (err) { + OError.tag(err, 'Error uploading file, retries failed', { + projectId, + fileArgs, + }) + return callback(err) + } + callback(err, result.url, result.fileRef) + } + ) + } + ) + }) }) }, _doUploadFileFromDisk(projectId, fileArgs, fsPath, callback) { const callbackOnce = _.once(callback) - FileHashManager.computeHash(fsPath, function (err, hashValue) { - if (err) { - return callbackOnce(err) + const fileRef = new File(fileArgs) + const fileId = fileRef._id + const readStream = fs.createReadStream(fsPath) + readStream.on('error', function (err) { + logger.warn( + { err, projectId, fileId, fsPath }, + 'something went wrong on the read stream of uploadFileFromDisk' + ) + callbackOnce(err) + }) + readStream.on('open', function () { + const url = FileStoreHandler._buildUrl(projectId, fileId) + const opts = { + method: 'post', + uri: url, + timeout: FIVE_MINS_IN_MS, + headers: { + 'X-File-Hash-From-Web': fileArgs.hash, + }, // send the hash to the filestore as a custom header so it can be checked } - const fileRef = new File(Object.assign({}, fileArgs, { hash: hashValue })) - const fileId = fileRef._id - const readStream = fs.createReadStream(fsPath) - readStream.on('error', function (err) { + const writeStream = request(opts) + writeStream.on('error', function (err) { logger.warn( { err, projectId, fileId, fsPath }, - 'something went wrong on the read stream of uploadFileFromDisk' + 'something went wrong on the write stream of uploadFileFromDisk' ) callbackOnce(err) }) - readStream.on('open', function () { - const url = FileStoreHandler._buildUrl(projectId, fileId) - const opts = { - method: 'post', - uri: url, - timeout: FIVE_MINS_IN_MS, - headers: { - 'X-File-Hash-From-Web': hashValue, - }, // send the hash to the filestore as a custom header so it can be checked - } - const writeStream = request(opts) - writeStream.on('error', function (err) { - logger.warn( - { err, projectId, fileId, fsPath }, - 'something went wrong on the write stream of uploadFileFromDisk' + writeStream.on('response', function (response) { + if (![200, 201].includes(response.statusCode)) { + const err = new OError( + `non-ok response from filestore for upload: ${response.statusCode}`, + { statusCode: response.statusCode } ) - callbackOnce(err) - }) - writeStream.on('response', function (response) { - if (![200, 201].includes(response.statusCode)) { - err = new OError( - `non-ok response from filestore for upload: ${response.statusCode}`, - { statusCode: response.statusCode } - ) - return callbackOnce(err) - } - callbackOnce(null, { url, fileRef }) - }) // have to pass back an object because async.retry only accepts a single result argument - readStream.pipe(writeStream) - }) + return callbackOnce(err) + } + callbackOnce(null, { url, fileRef }) + }) // have to pass back an object because async.retry only accepts a single result argument + readStream.pipe(writeStream) }) }, @@ -258,5 +303,6 @@ module.exports = FileStoreHandler module.exports.promises = promisifyAll(FileStoreHandler, { multiResult: { uploadFileFromDisk: ['url', 'fileRef'], + uploadFileFromDiskWithHistoryId: ['url', 'fileRef'], }, }) diff --git a/services/web/app/src/Features/History/HistoryManager.js b/services/web/app/src/Features/History/HistoryManager.js index cbf34dd870..03c4bc960b 100644 --- a/services/web/app/src/Features/History/HistoryManager.js +++ b/services/web/app/src/Features/History/HistoryManager.js @@ -1,5 +1,6 @@ const { callbackify } = require('util') const { fetchJson, fetchNothing } = require('@overleaf/fetch-utils') +const fs = require('fs') const settings = require('@overleaf/settings') const OError = require('@overleaf/o-error') const UserGetter = require('../User/UserGetter') @@ -113,6 +114,22 @@ async function _deleteProjectInFullProjectHistory(historyId) { } } +async function uploadBlobFromDisk(historyId, hash, byteLength, fsPath) { + const outStream = fs.createReadStream(fsPath) + + const url = `${settings.apis.v1_history.url}/projects/${historyId}/blobs/${hash}` + await fetchNothing(url, { + method: 'PUT', + body: outStream, + headers: { 'Content-Length': byteLength }, // add the content length to work around problems with chunked encoding in node 18 + signal: AbortSignal.timeout(60 * 1000), + basicAuth: { + user: settings.apis.v1_history.user, + password: settings.apis.v1_history.pass, + }, + }) +} + /** * Warning: Don't use this method for large projects. It will eagerly load all * the history data and apply all operations. @@ -265,6 +282,7 @@ module.exports = { deleteProjectHistory: callbackify(deleteProjectHistory), injectUserDetails: callbackify(injectUserDetails), getCurrentContent: callbackify(getCurrentContent), + uploadBlobFromDisk: callbackify(uploadBlobFromDisk), promises: { initializeProject, flushProject, @@ -274,5 +292,6 @@ module.exports = { deleteProjectHistory, getCurrentContent, getContentAtVersion, + uploadBlobFromDisk, }, } diff --git a/services/web/app/src/Features/Uploads/ProjectUploadManager.js b/services/web/app/src/Features/Uploads/ProjectUploadManager.js index 6cd095f134..8af189254f 100644 --- a/services/web/app/src/Features/Uploads/ProjectUploadManager.js +++ b/services/web/app/src/Features/Uploads/ProjectUploadManager.js @@ -15,6 +15,7 @@ const ProjectDetailsHandler = require('../Project/ProjectDetailsHandler') const ProjectDeleter = require('../Project/ProjectDeleter') const TpdsProjectFlusher = require('../ThirdPartyDataStore/TpdsProjectFlusher') const logger = require('@overleaf/logger') +const OError = require('@overleaf/o-error') module.exports = { createProjectFromZipArchive: callbackify(createProjectFromZipArchive), @@ -124,7 +125,7 @@ async function _initializeProjectWithZipContents( const importEntries = await FileSystemImportManager.promises.importDir(topLevelDir) const { fileEntries, docEntries } = await _createEntriesFromImports( - project._id, + project, importEntries ) const projectVersion = @@ -141,14 +142,14 @@ async function _initializeProjectWithZipContents( await TpdsProjectFlusher.promises.flushProjectToTpds(project._id) } -async function _createEntriesFromImports(projectId, importEntries) { +async function _createEntriesFromImports(project, importEntries) { const fileEntries = [] const docEntries = [] for (const importEntry of importEntries) { switch (importEntry.type) { case 'doc': { const docEntry = await _createDoc( - projectId, + project, importEntry.projectPath, importEntry.lines ) @@ -157,7 +158,7 @@ async function _createEntriesFromImports(projectId, importEntries) { } case 'file': { const fileEntry = await _createFile( - projectId, + project, importEntry.projectPath, importEntry.fsPath ) @@ -172,7 +173,8 @@ async function _createEntriesFromImports(projectId, importEntries) { return { fileEntries, docEntries } } -async function _createDoc(projectId, projectPath, docLines) { +async function _createDoc(project, projectPath, docLines) { + const projectId = project._id const docName = Path.basename(projectPath) const doc = new Doc({ name: docName }) await DocstoreManager.promises.updateDoc( @@ -185,13 +187,20 @@ async function _createDoc(projectId, projectPath, docLines) { return { doc, path: projectPath, docLines: docLines.join('\n') } } -async function _createFile(projectId, projectPath, fsPath) { +async function _createFile(project, projectPath, fsPath) { + const projectId = project._id + const historyId = project.overleaf?.history?.id + if (!historyId) { + throw new OError('missing history id') + } const fileName = Path.basename(projectPath) - const { fileRef, url } = await FileStoreHandler.promises.uploadFileFromDisk( - projectId, - { name: fileName }, - fsPath - ) + const { fileRef, url } = + await FileStoreHandler.promises.uploadFileFromDiskWithHistoryId( + projectId, + historyId, + { name: fileName }, + fsPath + ) return { file: fileRef, path: projectPath, url } } diff --git a/services/web/test/acceptance/src/mocks/MockV1HistoryApi.js b/services/web/test/acceptance/src/mocks/MockV1HistoryApi.js index 0b8e4b06c8..c4ec22fdc0 100644 --- a/services/web/test/acceptance/src/mocks/MockV1HistoryApi.js +++ b/services/web/test/acceptance/src/mocks/MockV1HistoryApi.js @@ -78,6 +78,10 @@ class MockV1HistoryApi extends AbstractMockApi { this.app.delete('/api/projects/:project_id', (req, res, next) => { res.sendStatus(204) }) + + this.app.put('/api/projects/:project_id/blobs/:hash', (req, res, next) => { + res.sendStatus(204) + }) } } diff --git a/services/web/test/unit/src/FileStore/FileStoreHandlerTests.js b/services/web/test/unit/src/FileStore/FileStoreHandlerTests.js index 52f2ac3e40..f7cb98a052 100644 --- a/services/web/test/unit/src/FileStore/FileStoreHandlerTests.js +++ b/services/web/test/unit/src/FileStore/FileStoreHandlerTests.js @@ -17,6 +17,7 @@ describe('FileStoreHandler', function () { isDirectory() { return false }, + size: this.fileSize, }), } this.writeStream = { @@ -38,6 +39,9 @@ describe('FileStoreHandler', function () { this.fileArgs = { name: 'upload-filename' } this.fileId = 'file_id_here' this.projectId = '1312312312' + this.historyId = 123 + this.fileSize = 999 + this.hashValue = '2aae6c35c94fcfb415dbe95f408b9ce91ee846ed' this.fsPath = 'uploads/myfile.eps' this.getFileUrl = (projectId, fileId) => `${this.filestoreUrl}/project/${projectId}/file/${fileId}` @@ -56,10 +60,21 @@ describe('FileStoreHandler', function () { this.FileHashManager = { computeHash: sinon.stub().callsArgWith(1, null, this.hashValue), } + this.HistoryManager = { + uploadBlobFromDisk: sinon.stub().callsArg(4), + } + this.ProjectDetailsHandler = { + getDetails: sinon.stub().callsArgWith(1, null, { + overleaf: { history: { id: this.historyId } }, + }), + } + this.handler = SandboxedModule.require(MODULE_PATH, { requires: { '@overleaf/settings': this.settings, request: this.request, + '../History/HistoryManager': this.HistoryManager, + '../Project/ProjectDetailsHandler': this.ProjectDetailsHandler, './FileHashManager': this.FileHashManager, // FIXME: need to stub File object here '../../models/File': { @@ -75,6 +90,77 @@ describe('FileStoreHandler', function () { this.request.returns(this.writeStream) }) + it('should get the project details', function (done) { + this.fs.createReadStream.returns({ + pipe() {}, + on(type, cb) { + if (type === 'open') { + cb() + } + }, + }) + this.handler.uploadFileFromDisk( + this.projectId, + this.fileArgs, + this.fsPath, + () => { + this.ProjectDetailsHandler.getDetails + .calledWith(this.projectId) + .should.equal(true) + done() + } + ) + }) + + it('should compute the file hash', function (done) { + this.fs.createReadStream.returns({ + pipe() {}, + on(type, cb) { + if (type === 'open') { + cb() + } + }, + }) + this.handler.uploadFileFromDisk( + this.projectId, + this.fileArgs, + this.fsPath, + () => { + this.FileHashManager.computeHash + .calledWith(this.fsPath) + .should.equal(true) + done() + } + ) + }) + + it('should upload the file to the history store as a blob', function (done) { + this.fs.createReadStream.returns({ + pipe() {}, + on(type, cb) { + if (type === 'open') { + cb() + } + }, + }) + this.handler.uploadFileFromDisk( + this.projectId, + this.fileArgs, + this.fsPath, + () => { + this.HistoryManager.uploadBlobFromDisk + .calledWith( + this.historyId, + this.hashValue, + this.fileSize, + this.fsPath + ) + .should.equal(true) + done() + } + ) + }) + it('should create read stream', function (done) { this.fs.createReadStream.returns({ pipe() {}, diff --git a/services/web/test/unit/src/Uploads/ProjectUploadManagerTests.js b/services/web/test/unit/src/Uploads/ProjectUploadManagerTests.js index e996259095..abdec03d11 100644 --- a/services/web/test/unit/src/Uploads/ProjectUploadManagerTests.js +++ b/services/web/test/unit/src/Uploads/ProjectUploadManagerTests.js @@ -94,7 +94,7 @@ describe('ProjectUploadManager', function () { } this.FileStoreHandler = { promises: { - uploadFileFromDisk: sinon + uploadFileFromDiskWithHistoryId: sinon .stub() .resolves({ fileRef: this.file, url: this.fileStoreUrl }), },