mirror of
https://github.com/overleaf/overleaf.git
synced 2025-04-20 07:23:40 +00:00
Merge pull request #22208 from overleaf/jpa-clsi-hash
[misc] clsi: read files from history-v1 with fallback to filestore GitOrigin-RevId: c54bb128780198c14e7a63818f39fad62ce65d4e
This commit is contained in:
parent
f6cbcc51d8
commit
d19c5e236f
16 changed files with 431 additions and 28 deletions
|
@ -169,11 +169,15 @@ function _parseResource(resource) {
|
|||
if (resource.url != null && typeof resource.url !== 'string') {
|
||||
throw new Error('url attribute should be a string')
|
||||
}
|
||||
if (resource.fallbackURL && typeof resource.fallbackURL !== 'string') {
|
||||
throw new Error('fallbackURL attribute should be a string')
|
||||
}
|
||||
|
||||
return {
|
||||
path: resource.path,
|
||||
modified,
|
||||
url: resource.url,
|
||||
fallbackURL: resource.fallbackURL,
|
||||
content: resource.content,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -333,6 +333,7 @@ module.exports = ResourceWriter = {
|
|||
return UrlCache.downloadUrlToFile(
|
||||
projectId,
|
||||
resource.url,
|
||||
resource.fallbackURL,
|
||||
path,
|
||||
resource.modified,
|
||||
function (err) {
|
||||
|
|
|
@ -47,14 +47,29 @@ async function createProjectDir(projectId) {
|
|||
await fs.promises.mkdir(getProjectDir(projectId), { recursive: true })
|
||||
}
|
||||
|
||||
async function downloadUrlToFile(projectId, url, destPath, lastModified) {
|
||||
async function downloadUrlToFile(
|
||||
projectId,
|
||||
url,
|
||||
fallbackURL,
|
||||
destPath,
|
||||
lastModified
|
||||
) {
|
||||
const cachePath = getCachePath(projectId, url, lastModified)
|
||||
try {
|
||||
const timer = new Metrics.Timer('url_cache', {
|
||||
status: 'cache-hit',
|
||||
path: 'copy',
|
||||
})
|
||||
await fs.promises.copyFile(cachePath, destPath)
|
||||
try {
|
||||
await fs.promises.copyFile(cachePath, destPath)
|
||||
} catch (err) {
|
||||
if (err.code === 'ENOENT' && fallbackURL) {
|
||||
const fallbackPath = getCachePath(projectId, fallbackURL, lastModified)
|
||||
await fs.promises.copyFile(fallbackPath, destPath)
|
||||
} else {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
// the metric is only updated if the file is present in the cache
|
||||
timer.done()
|
||||
return
|
||||
|
@ -70,7 +85,7 @@ async function downloadUrlToFile(projectId, url, destPath, lastModified) {
|
|||
path: 'download',
|
||||
})
|
||||
try {
|
||||
await download(url, cachePath)
|
||||
await download(url, fallbackURL, cachePath)
|
||||
} finally {
|
||||
timer.done()
|
||||
}
|
||||
|
@ -86,13 +101,17 @@ async function downloadUrlToFile(projectId, url, destPath, lastModified) {
|
|||
}
|
||||
}
|
||||
|
||||
async function download(url, cachePath) {
|
||||
async function download(url, fallbackURL, cachePath) {
|
||||
let pending = PENDING_DOWNLOADS.get(cachePath)
|
||||
if (pending) {
|
||||
return pending
|
||||
}
|
||||
|
||||
pending = UrlFetcher.promises.pipeUrlToFileWithRetry(url, cachePath)
|
||||
pending = UrlFetcher.promises.pipeUrlToFileWithRetry(
|
||||
url,
|
||||
fallbackURL,
|
||||
cachePath
|
||||
)
|
||||
PENDING_DOWNLOADS.set(cachePath, pending)
|
||||
try {
|
||||
await pending
|
||||
|
|
|
@ -5,6 +5,7 @@ const {
|
|||
CustomHttpAgent,
|
||||
CustomHttpsAgent,
|
||||
fetchStream,
|
||||
RequestFailedError,
|
||||
} = require('@overleaf/fetch-utils')
|
||||
const { URL } = require('node:url')
|
||||
const { pipeline } = require('node:stream/promises')
|
||||
|
@ -14,7 +15,7 @@ const MAX_CONNECT_TIME = 1000
|
|||
const httpAgent = new CustomHttpAgent({ connectTimeout: MAX_CONNECT_TIME })
|
||||
const httpsAgent = new CustomHttpsAgent({ connectTimeout: MAX_CONNECT_TIME })
|
||||
|
||||
async function pipeUrlToFileWithRetry(url, filePath) {
|
||||
async function pipeUrlToFileWithRetry(url, fallbackURL, filePath) {
|
||||
let remainingAttempts = 3
|
||||
let lastErr
|
||||
while (remainingAttempts-- > 0) {
|
||||
|
@ -22,7 +23,7 @@ async function pipeUrlToFileWithRetry(url, filePath) {
|
|||
path: lastErr ? ' retry' : 'fetch',
|
||||
})
|
||||
try {
|
||||
await pipeUrlToFile(url, filePath)
|
||||
await pipeUrlToFile(url, fallbackURL, filePath)
|
||||
timer.done({ status: 'success' })
|
||||
return
|
||||
} catch (err) {
|
||||
|
@ -37,7 +38,7 @@ async function pipeUrlToFileWithRetry(url, filePath) {
|
|||
throw lastErr
|
||||
}
|
||||
|
||||
async function pipeUrlToFile(url, filePath) {
|
||||
async function pipeUrlToFile(url, fallbackURL, filePath) {
|
||||
const u = new URL(url)
|
||||
if (
|
||||
Settings.filestoreDomainOveride &&
|
||||
|
@ -45,21 +46,55 @@ async function pipeUrlToFile(url, filePath) {
|
|||
) {
|
||||
url = `${Settings.filestoreDomainOveride}${u.pathname}${u.search}`
|
||||
}
|
||||
if (fallbackURL) {
|
||||
const u2 = new URL(fallbackURL)
|
||||
if (
|
||||
Settings.filestoreDomainOveride &&
|
||||
u2.host !== Settings.apis.clsiPerf.host
|
||||
) {
|
||||
fallbackURL = `${Settings.filestoreDomainOveride}${u2.pathname}${u2.search}`
|
||||
}
|
||||
}
|
||||
|
||||
const stream = await fetchStream(url, {
|
||||
signal: AbortSignal.timeout(60 * 1000),
|
||||
// provide a function to get the agent for each request
|
||||
// as there may be multiple requests with different protocols
|
||||
// due to redirects.
|
||||
agent: _url => (_url.protocol === 'https:' ? httpsAgent : httpAgent),
|
||||
})
|
||||
let stream
|
||||
try {
|
||||
stream = await fetchStream(url, {
|
||||
signal: AbortSignal.timeout(60 * 1000),
|
||||
// provide a function to get the agent for each request
|
||||
// as there may be multiple requests with different protocols
|
||||
// due to redirects.
|
||||
agent: _url => (_url.protocol === 'https:' ? httpsAgent : httpAgent),
|
||||
})
|
||||
} catch (err) {
|
||||
if (
|
||||
fallbackURL &&
|
||||
err instanceof RequestFailedError &&
|
||||
err.response.status === 404
|
||||
) {
|
||||
stream = await fetchStream(fallbackURL, {
|
||||
signal: AbortSignal.timeout(60 * 1000),
|
||||
// provide a function to get the agent for each request
|
||||
// as there may be multiple requests with different protocols
|
||||
// due to redirects.
|
||||
agent: _url => (_url.protocol === 'https:' ? httpsAgent : httpAgent),
|
||||
})
|
||||
url = fallbackURL
|
||||
} else {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
const source = inferSource(url)
|
||||
Metrics.inc('url_source', 1, { path: source })
|
||||
|
||||
const atomicWrite = filePath + '~'
|
||||
try {
|
||||
const output = fs.createWriteStream(atomicWrite)
|
||||
await pipeline(stream, output)
|
||||
await fs.promises.rename(atomicWrite, filePath)
|
||||
Metrics.count('UrlFetcher.downloaded_bytes', output.bytesWritten)
|
||||
Metrics.count('UrlFetcher.downloaded_bytes', output.bytesWritten, {
|
||||
path: source,
|
||||
})
|
||||
} catch (err) {
|
||||
try {
|
||||
await fs.promises.unlink(atomicWrite)
|
||||
|
@ -68,6 +103,20 @@ async function pipeUrlToFile(url, filePath) {
|
|||
}
|
||||
}
|
||||
|
||||
const BUCKET_REGEX = /\/bucket\/([^/]+)\/key\//
|
||||
|
||||
function inferSource(url) {
|
||||
if (url.includes(Settings.apis.clsiPerf.host)) {
|
||||
return 'clsi-perf'
|
||||
} else if (url.includes('/project/') && url.includes('/file/')) {
|
||||
return 'user-files'
|
||||
} else if (url.includes('/key/')) {
|
||||
const match = url.match(BUCKET_REGEX)
|
||||
if (match) return match[1]
|
||||
}
|
||||
return 'unknown'
|
||||
}
|
||||
|
||||
module.exports.promises = {
|
||||
pipeUrlToFileWithRetry,
|
||||
}
|
||||
|
|
|
@ -14,6 +14,8 @@ const Path = require('node:path')
|
|||
const Client = require('./helpers/Client')
|
||||
const sinon = require('sinon')
|
||||
const ClsiApp = require('./helpers/ClsiApp')
|
||||
const request = require('request')
|
||||
const Settings = require('@overleaf/settings')
|
||||
|
||||
const Server = {
|
||||
run() {
|
||||
|
@ -35,6 +37,21 @@ const Server = {
|
|||
}
|
||||
})
|
||||
|
||||
app.get('/not-found', (req, res, next) => {
|
||||
this.getFile(req.url)
|
||||
res.status(404).end()
|
||||
})
|
||||
|
||||
app.get('/project/:projectId/file/:fileId', (req, res, next) => {
|
||||
this.getFile(req.url)
|
||||
return res.send(`${req.params.projectId}:${req.params.fileId}`)
|
||||
})
|
||||
|
||||
app.get('/bucket/:bucket/key/*', (req, res, next) => {
|
||||
this.getFile(req.url)
|
||||
return res.send(`${req.params.bucket}:${req.params[0]}`)
|
||||
})
|
||||
|
||||
app.get('/:random_id/*', (req, res, next) => {
|
||||
this.getFile(req.url)
|
||||
req.url = `/${req.params[0]}`
|
||||
|
@ -218,9 +235,24 @@ describe('Url Caching', function () {
|
|||
return Server.getFile.restore()
|
||||
})
|
||||
|
||||
return it('should not download the image again', function () {
|
||||
it('should not download the image again', function () {
|
||||
return Server.getFile.called.should.equal(false)
|
||||
})
|
||||
|
||||
it('should gather metrics', function (done) {
|
||||
request.get(`${Settings.apis.clsi.url}/metrics`, (err, res, body) => {
|
||||
if (err) return done(err)
|
||||
body
|
||||
.split('\n')
|
||||
.some(line => {
|
||||
return (
|
||||
line.startsWith('url_source') && line.includes('path="unknown"')
|
||||
)
|
||||
})
|
||||
.should.equal(true)
|
||||
done()
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('When an image is in the cache and the last modified date is advanced', function () {
|
||||
|
@ -391,7 +423,7 @@ describe('Url Caching', function () {
|
|||
})
|
||||
})
|
||||
|
||||
return describe('After clearing the cache', function () {
|
||||
describe('After clearing the cache', function () {
|
||||
before(function (done) {
|
||||
this.project_id = Client.randomId()
|
||||
this.file = `${Server.randomId()}/lion.png`
|
||||
|
@ -446,4 +478,140 @@ describe('Url Caching', function () {
|
|||
return Server.getFile.called.should.equal(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('fallbackURL', function () {
|
||||
describe('when the primary resource is available', function () {
|
||||
before(function (done) {
|
||||
this.project_id = Client.randomId()
|
||||
this.file = `/project/${Server.randomId()}/file/${Server.randomId()}`
|
||||
this.fallback = `/bucket/project-blobs/key/ab/cd/${Server.randomId()}`
|
||||
this.request = {
|
||||
resources: [
|
||||
{
|
||||
path: 'main.tex',
|
||||
content: `\
|
||||
\\documentclass{article}
|
||||
\\usepackage{graphicx}
|
||||
\\begin{document}
|
||||
\\includegraphics{lion.png}
|
||||
\\end{document}\
|
||||
`,
|
||||
},
|
||||
{
|
||||
path: 'lion.png',
|
||||
url: `http://filestore${this.file}`,
|
||||
fallbackURL: `http://filestore${this.fallback}`,
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
sinon.spy(Server, 'getFile')
|
||||
return ClsiApp.ensureRunning(() => {
|
||||
return Client.compile(
|
||||
this.project_id,
|
||||
this.request,
|
||||
(error, res, body) => {
|
||||
this.error = error
|
||||
this.res = res
|
||||
this.body = body
|
||||
return done()
|
||||
}
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
after(function () {
|
||||
return Server.getFile.restore()
|
||||
})
|
||||
|
||||
it('should download from the primary', function () {
|
||||
Server.getFile.calledWith(this.file).should.equal(true)
|
||||
})
|
||||
it('should not download from the fallback', function () {
|
||||
Server.getFile.calledWith(this.fallback).should.equal(false)
|
||||
})
|
||||
|
||||
it('should gather metrics', function (done) {
|
||||
request.get(`${Settings.apis.clsi.url}/metrics`, (err, res, body) => {
|
||||
if (err) return done(err)
|
||||
body
|
||||
.split('\n')
|
||||
.some(line => {
|
||||
return (
|
||||
line.startsWith('url_source') &&
|
||||
line.includes('path="user-files"')
|
||||
)
|
||||
})
|
||||
.should.equal(true)
|
||||
done()
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('when the primary resource is not available', function () {
|
||||
before(function (done) {
|
||||
this.project_id = Client.randomId()
|
||||
this.file = `/project/${Server.randomId()}/file/${Server.randomId()}`
|
||||
this.fallback = `/bucket/project-blobs/key/ab/cd/${Server.randomId()}`
|
||||
this.request = {
|
||||
resources: [
|
||||
{
|
||||
path: 'main.tex',
|
||||
content: `\
|
||||
\\documentclass{article}
|
||||
\\usepackage{graphicx}
|
||||
\\begin{document}
|
||||
\\includegraphics{lion.png}
|
||||
\\end{document}\
|
||||
`,
|
||||
},
|
||||
{
|
||||
path: 'lion.png',
|
||||
url: `http://filestore/not-found`,
|
||||
fallbackURL: `http://filestore${this.fallback}`,
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
sinon.spy(Server, 'getFile')
|
||||
return ClsiApp.ensureRunning(() => {
|
||||
return Client.compile(
|
||||
this.project_id,
|
||||
this.request,
|
||||
(error, res, body) => {
|
||||
this.error = error
|
||||
this.res = res
|
||||
this.body = body
|
||||
return done()
|
||||
}
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
after(function () {
|
||||
return Server.getFile.restore()
|
||||
})
|
||||
|
||||
it('should download from the fallback', function () {
|
||||
Server.getFile.calledWith(`/not-found`).should.equal(true)
|
||||
Server.getFile.calledWith(this.fallback).should.equal(true)
|
||||
})
|
||||
|
||||
it('should gather metrics', function (done) {
|
||||
request.get(`${Settings.apis.clsi.url}/metrics`, (err, res, body) => {
|
||||
if (err) return done(err)
|
||||
body
|
||||
.split('\n')
|
||||
.some(line => {
|
||||
return (
|
||||
line.startsWith('url_source') &&
|
||||
line.includes('path="project-blobs"')
|
||||
)
|
||||
})
|
||||
.should.equal(true)
|
||||
done()
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
|
@ -378,12 +378,13 @@ describe('ResourceWriter', function () {
|
|||
this.fs.mkdir = sinon.stub().callsArg(2)
|
||||
this.resource = {
|
||||
path: 'main.tex',
|
||||
url: 'http://www.example.com/main.tex',
|
||||
url: 'http://www.example.com/primary/main.tex',
|
||||
fallbackURL: 'http://fallback.example.com/fallback/main.tex',
|
||||
modified: Date.now(),
|
||||
}
|
||||
this.UrlCache.downloadUrlToFile = sinon
|
||||
.stub()
|
||||
.callsArgWith(4, 'fake error downloading file')
|
||||
.callsArgWith(5, 'fake error downloading file')
|
||||
return this.ResourceWriter._writeResourceToDisk(
|
||||
this.project_id,
|
||||
this.resource,
|
||||
|
@ -405,6 +406,7 @@ describe('ResourceWriter', function () {
|
|||
.calledWith(
|
||||
this.project_id,
|
||||
this.resource.url,
|
||||
this.resource.fallbackURL,
|
||||
path.join(this.basePath, this.resource.path),
|
||||
this.resource.modified
|
||||
)
|
||||
|
|
|
@ -23,6 +23,7 @@ describe('UrlCache', function () {
|
|||
this.callback = sinon.stub()
|
||||
this.url =
|
||||
'http://filestore/project/60b0dd39c418bc00598a0d22/file/60ae721ffb1d920027d3201f'
|
||||
this.fallbackURL = 'http://filestore/bucket/project-blobs/key/ab/cd/ef'
|
||||
this.project_id = '60b0dd39c418bc00598a0d22'
|
||||
return (this.UrlCache = SandboxedModule.require(modulePath, {
|
||||
requires: {
|
||||
|
@ -54,6 +55,29 @@ describe('UrlCache', function () {
|
|||
this.UrlCache.downloadUrlToFile(
|
||||
this.project_id,
|
||||
this.url,
|
||||
this.fallbackURL,
|
||||
this.destPath,
|
||||
this.lastModified,
|
||||
error => {
|
||||
expect(error).to.not.exist
|
||||
expect(
|
||||
this.UrlFetcher.promises.pipeUrlToFileWithRetry.called
|
||||
).to.equal(false)
|
||||
done()
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
it('should not download on the semi-happy path', function (done) {
|
||||
const codedError = new Error()
|
||||
codedError.code = 'ENOENT'
|
||||
this.fs.promises.copyFile.onCall(0).rejects(codedError)
|
||||
this.fs.promises.copyFile.onCall(1).resolves()
|
||||
|
||||
this.UrlCache.downloadUrlToFile(
|
||||
this.project_id,
|
||||
this.url,
|
||||
this.fallbackURL,
|
||||
this.destPath,
|
||||
this.lastModified,
|
||||
error => {
|
||||
|
@ -70,11 +94,13 @@ describe('UrlCache', function () {
|
|||
const codedError = new Error()
|
||||
codedError.code = 'ENOENT'
|
||||
this.fs.promises.copyFile.onCall(0).rejects(codedError)
|
||||
this.fs.promises.copyFile.onCall(1).resolves()
|
||||
this.fs.promises.copyFile.onCall(1).rejects(codedError)
|
||||
this.fs.promises.copyFile.onCall(2).resolves()
|
||||
|
||||
this.UrlCache.downloadUrlToFile(
|
||||
this.project_id,
|
||||
this.url,
|
||||
this.fallbackURL,
|
||||
this.destPath,
|
||||
this.lastModified,
|
||||
error => {
|
||||
|
@ -94,6 +120,7 @@ describe('UrlCache', function () {
|
|||
this.UrlCache.downloadUrlToFile(
|
||||
this.project_id,
|
||||
this.url,
|
||||
this.fallbackURL,
|
||||
this.destPath,
|
||||
this.lastModified,
|
||||
error => {
|
||||
|
|
|
@ -73,6 +73,10 @@ const settings = {
|
|||
stores: {
|
||||
user_files: process.env.USER_FILES_BUCKET_NAME,
|
||||
template_files: process.env.TEMPLATE_FILES_BUCKET_NAME,
|
||||
|
||||
// allow signed links to be generated for these buckets
|
||||
project_blobs: process.env.OVERLEAF_EDITOR_PROJECT_BLOBS_BUCKET,
|
||||
global_blobs: process.env.OVERLEAF_EDITOR_BLOBS_BUCKET,
|
||||
},
|
||||
|
||||
fallback: process.env.FALLBACK_BACKEND
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
// Keep in sync with services/web/app/src/Features/History/project_key.js
|
||||
const _ = require('lodash')
|
||||
const path = require('node:path')
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@ import metrics from '@overleaf/metrics'
|
|||
import Settings from '@overleaf/settings'
|
||||
import logger from '@overleaf/logger'
|
||||
import PlansLocator from './app/src/Features/Subscription/PlansLocator.js'
|
||||
import HistoryManager from './app/src/Features/History/HistoryManager.js'
|
||||
import SiteAdminHandler from './app/src/infrastructure/SiteAdminHandler.js'
|
||||
import http from 'node:http'
|
||||
import https from 'node:https'
|
||||
|
@ -58,7 +59,11 @@ if (process.argv[1] === fileURLToPath(import.meta.url)) {
|
|||
|
||||
PlansLocator.ensurePlansAreSetupCorrectly()
|
||||
|
||||
Promise.all([mongodb.connectionPromise, mongoose.connectionPromise])
|
||||
Promise.all([
|
||||
mongodb.connectionPromise,
|
||||
mongoose.connectionPromise,
|
||||
HistoryManager.promises.loadGlobalBlobs(),
|
||||
])
|
||||
.then(async () => {
|
||||
Server.server.listen(port, host, function () {
|
||||
logger.debug(`web starting up, listening on ${host}:${port}`)
|
||||
|
@ -76,7 +81,11 @@ if (process.argv[1] === fileURLToPath(import.meta.url)) {
|
|||
}
|
||||
|
||||
// initialise site admin tasks
|
||||
Promise.all([mongodb.connectionPromise, mongoose.connectionPromise])
|
||||
Promise.all([
|
||||
mongodb.connectionPromise,
|
||||
mongoose.connectionPromise,
|
||||
HistoryManager.promises.loadGlobalBlobs(),
|
||||
])
|
||||
.then(() => SiteAdminHandler.initialise())
|
||||
.catch(err => {
|
||||
logger.fatal({ err }, 'Cannot connect to mongo. Exiting.')
|
||||
|
|
|
@ -24,6 +24,7 @@ const ClsiFormatChecker = require('./ClsiFormatChecker')
|
|||
const DocumentUpdaterHandler = require('../DocumentUpdater/DocumentUpdaterHandler')
|
||||
const Metrics = require('@overleaf/metrics')
|
||||
const Errors = require('../Errors/Errors')
|
||||
const { getBlobLocation } = require('../History/HistoryManager')
|
||||
|
||||
const VALID_COMPILERS = ['pdflatex', 'latex', 'xelatex', 'lualatex']
|
||||
const OUTPUT_FILE_TIMEOUT_MS = 60000
|
||||
|
@ -532,6 +533,7 @@ async function _buildRequest(projectId, options) {
|
|||
rootDoc_id: 1,
|
||||
imageName: 1,
|
||||
rootFolder: 1,
|
||||
'overleaf.history.id': 1,
|
||||
})
|
||||
if (project == null) {
|
||||
throw new Errors.NotFoundError(`project does not exist: ${projectId}`)
|
||||
|
@ -731,12 +733,18 @@ function _finaliseRequest(projectId, options, project, docs, files) {
|
|||
}
|
||||
}
|
||||
|
||||
const historyId = project.overleaf.history.id
|
||||
if (!historyId) {
|
||||
throw new OError('project does not have a history id', { projectId })
|
||||
}
|
||||
for (let path in files) {
|
||||
const file = files[path]
|
||||
path = path.replace(/^\//, '') // Remove leading /
|
||||
const { bucket, key } = getBlobLocation(historyId, file.hash)
|
||||
resources.push({
|
||||
path,
|
||||
url: `${Settings.apis.filestore.url}/project/${project._id}/file/${file._id}`,
|
||||
url: `${Settings.apis.filestore.url}/bucket/${bucket}/key/${key}`,
|
||||
fallbackURL: `${Settings.apis.filestore.url}/project/${project._id}/file/${file._id}`,
|
||||
modified: file.created?.getTime(),
|
||||
})
|
||||
}
|
||||
|
|
|
@ -11,10 +11,46 @@ const OError = require('@overleaf/o-error')
|
|||
const UserGetter = require('../User/UserGetter')
|
||||
const ProjectGetter = require('../Project/ProjectGetter')
|
||||
const HistoryBackupDeletionHandler = require('./HistoryBackupDeletionHandler')
|
||||
const { ObjectId } = require('../../infrastructure/mongodb')
|
||||
const { db, ObjectId } = require('../../infrastructure/mongodb')
|
||||
const Metrics = require('@overleaf/metrics')
|
||||
const logger = require('@overleaf/logger')
|
||||
const { NotFoundError } = require('../Errors/Errors')
|
||||
const projectKey = require('./project_key')
|
||||
|
||||
// BEGIN copy from services/history-v1/storage/lib/blob_store/index.js
|
||||
|
||||
const GLOBAL_BLOBS = new Set() // CHANGE FROM SOURCE: only store hashes.
|
||||
|
||||
function makeGlobalKey(hash) {
|
||||
return `${hash.slice(0, 2)}/${hash.slice(2, 4)}/${hash.slice(4)}`
|
||||
}
|
||||
|
||||
function makeProjectKey(projectId, hash) {
|
||||
return `${projectKey.format(projectId)}/${hash.slice(0, 2)}/${hash.slice(2)}`
|
||||
}
|
||||
|
||||
function getBlobLocation(projectId, hash) {
|
||||
if (GLOBAL_BLOBS.has(hash)) {
|
||||
return {
|
||||
bucket: settings.apis.v1_history.buckets.globalBlobs,
|
||||
key: makeGlobalKey(hash),
|
||||
}
|
||||
} else {
|
||||
return {
|
||||
bucket: settings.apis.v1_history.buckets.projectBlobs,
|
||||
key: makeProjectKey(projectId, hash),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function loadGlobalBlobs() {
|
||||
const blobs = db.projectHistoryGlobalBlobs.find()
|
||||
for await (const blob of blobs) {
|
||||
GLOBAL_BLOBS.add(blob._id) // CHANGE FROM SOURCE: only store hashes.
|
||||
}
|
||||
}
|
||||
|
||||
// END copy from services/history-v1/storage/lib/blob_store/index.js
|
||||
|
||||
async function initializeProject(projectId) {
|
||||
const body = await fetchJson(`${settings.apis.project_history.url}/project`, {
|
||||
|
@ -357,6 +393,7 @@ function _userView(user) {
|
|||
}
|
||||
|
||||
module.exports = {
|
||||
getBlobLocation,
|
||||
initializeProject: callbackify(initializeProject),
|
||||
flushProject: callbackify(flushProject),
|
||||
resyncProject: callbackify(resyncProject),
|
||||
|
@ -368,6 +405,7 @@ module.exports = {
|
|||
copyBlob: callbackify(copyBlob),
|
||||
requestBlobWithFallback: callbackify(requestBlobWithFallback),
|
||||
promises: {
|
||||
loadGlobalBlobs,
|
||||
initializeProject,
|
||||
flushProject,
|
||||
resyncProject,
|
||||
|
|
24
services/web/app/src/Features/History/project_key.js
Normal file
24
services/web/app/src/Features/History/project_key.js
Normal file
|
@ -0,0 +1,24 @@
|
|||
// Keep in sync with services/history-v1/storage/lib/project_key.js
|
||||
const _ = require('lodash')
|
||||
const path = require('node:path')
|
||||
|
||||
//
|
||||
// The advice in http://docs.aws.amazon.com/AmazonS3/latest/dev/
|
||||
// request-rate-perf-considerations.html is to avoid sequential key prefixes,
|
||||
// so we reverse the project ID part of the key as they suggest.
|
||||
//
|
||||
function format(projectId) {
|
||||
const prefix = naiveReverse(pad(projectId))
|
||||
return path.join(prefix.slice(0, 3), prefix.slice(3, 6), prefix.slice(6))
|
||||
}
|
||||
|
||||
function pad(number) {
|
||||
return _.padStart(number, 9, '0')
|
||||
}
|
||||
|
||||
function naiveReverse(string) {
|
||||
return string.split('').reverse().join('')
|
||||
}
|
||||
|
||||
exports.format = format
|
||||
exports.pad = pad
|
|
@ -68,6 +68,7 @@ const db = {
|
|||
projectAuditLogEntries: internalDb.collection('projectAuditLogEntries'),
|
||||
projectHistoryChunks: internalDb.collection('projectHistoryChunks'),
|
||||
projectHistoryFailures: internalDb.collection('projectHistoryFailures'),
|
||||
projectHistoryGlobalBlobs: internalDb.collection('projectHistoryGlobalBlobs'),
|
||||
projectHistoryLabels: internalDb.collection('projectHistoryLabels'),
|
||||
projectHistoryMetaData: internalDb.collection('projectHistoryMetaData'),
|
||||
projectHistorySyncState: internalDb.collection('projectHistorySyncState'),
|
||||
|
|
|
@ -275,6 +275,23 @@ module.exports = {
|
|||
process.env.HAVE_I_BEEN_PWNED_URL || 'https://api.pwnedpasswords.com',
|
||||
timeout: parseInt(process.env.HAVE_I_BEEN_PWNED_TIMEOUT, 10) || 5 * 1000,
|
||||
},
|
||||
v1_history: {
|
||||
url:
|
||||
process.env.V1_HISTORY_URL ||
|
||||
`http://${process.env.V1_HISTORY_HOST || '127.0.0.1'}:${
|
||||
process.env.V1_HISTORY_PORT || '3100'
|
||||
}/api`,
|
||||
user: process.env.V1_HISTORY_USER || 'staging',
|
||||
pass:
|
||||
process.env.V1_HISTORY_PASS ||
|
||||
process.env.V1_HISTORY_PASSWORD ||
|
||||
'password',
|
||||
|
||||
buckets: {
|
||||
globalBlobs: process.env.OVERLEAF_EDITOR_BLOBS_BUCKET,
|
||||
projectBlobs: process.env.OVERLEAF_EDITOR_PROJECT_BLOBS_BUCKET,
|
||||
},
|
||||
},
|
||||
|
||||
// For legacy reasons, we need to populate the below objects.
|
||||
v1: {},
|
||||
|
|
|
@ -9,6 +9,8 @@ const FILESTORE_URL = 'http://filestore.example.com'
|
|||
const CLSI_HOST = 'clsi.example.com'
|
||||
const MODULE_PATH = '../../../../app/src/Features/Compile/ClsiManager.js'
|
||||
|
||||
const GLOBAL_BLOB_HASH = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
|
||||
|
||||
describe('ClsiManager', function () {
|
||||
beforeEach(function () {
|
||||
this.user_id = 'user-id'
|
||||
|
@ -17,6 +19,7 @@ describe('ClsiManager', function () {
|
|||
compiler: 'latex',
|
||||
rootDoc_id: 'mock-doc-id-1',
|
||||
imageName: 'mock-image-name',
|
||||
overleaf: { history: { id: 42 } },
|
||||
}
|
||||
this.docs = {
|
||||
'/main.tex': {
|
||||
|
@ -31,10 +34,17 @@ describe('ClsiManager', function () {
|
|||
},
|
||||
}
|
||||
this.files = {
|
||||
'/images/image.png': {
|
||||
name: 'image.png',
|
||||
'/images/frog.png': {
|
||||
name: 'frog.png',
|
||||
_id: 'mock-file-id-1',
|
||||
created: new Date(),
|
||||
hash: GLOBAL_BLOB_HASH,
|
||||
},
|
||||
'/images/image.png': {
|
||||
name: 'image.png',
|
||||
_id: 'mock-file-id-2',
|
||||
created: new Date(),
|
||||
hash: 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
|
||||
},
|
||||
}
|
||||
this.clsiCookieKey = 'clsiserver'
|
||||
|
@ -129,6 +139,17 @@ describe('ClsiManager', function () {
|
|||
enablePdfCaching: true,
|
||||
clsiCookie: { key: 'clsiserver' },
|
||||
}
|
||||
this.HistoryManager = {
|
||||
getBlobLocation: sinon.stub().callsFake((historyId, hash) => {
|
||||
if (hash === GLOBAL_BLOB_HASH) {
|
||||
return {
|
||||
bucket: 'global-blobs',
|
||||
key: 'aa/aa/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
|
||||
}
|
||||
}
|
||||
return { bucket: 'project-blobs', key: `${historyId}/${hash}` }
|
||||
}),
|
||||
}
|
||||
|
||||
this.ClsiManager = SandboxedModule.require(MODULE_PATH, {
|
||||
requires: {
|
||||
|
@ -145,6 +166,7 @@ describe('ClsiManager', function () {
|
|||
'@overleaf/fetch-utils': this.FetchUtils,
|
||||
'./ClsiFormatChecker': this.ClsiFormatChecker,
|
||||
'@overleaf/metrics': this.Metrics,
|
||||
'../History/HistoryManager': this.HistoryManager,
|
||||
},
|
||||
})
|
||||
tk.freeze(Date.now())
|
||||
|
@ -238,6 +260,7 @@ describe('ClsiManager', function () {
|
|||
rootDoc_id: 1,
|
||||
imageName: 1,
|
||||
rootFolder: 1,
|
||||
'overleaf.history.id': 1,
|
||||
}
|
||||
)
|
||||
})
|
||||
|
@ -372,6 +395,7 @@ describe('ClsiManager', function () {
|
|||
rootDoc_id: 1,
|
||||
imageName: 1,
|
||||
rootFolder: 1,
|
||||
'overleaf.history.id': 1,
|
||||
}
|
||||
)
|
||||
})
|
||||
|
@ -1003,9 +1027,16 @@ function _makeResources(project, docs, files) {
|
|||
})
|
||||
}
|
||||
for (const [path, file] of Object.entries(files)) {
|
||||
let url
|
||||
if (file.hash === GLOBAL_BLOB_HASH) {
|
||||
url = `${FILESTORE_URL}/bucket/global-blobs/key/aa/aa/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`
|
||||
} else {
|
||||
url = `${FILESTORE_URL}/bucket/project-blobs/key/${project.overleaf.history.id}/${file.hash}`
|
||||
}
|
||||
resources.push({
|
||||
path: path.replace(/^\//, ''),
|
||||
url: `${FILESTORE_URL}/project/${project._id}/file/${file._id}`,
|
||||
url,
|
||||
fallbackURL: `${FILESTORE_URL}/project/${project._id}/file/${file._id}`,
|
||||
modified: file.created.getTime(),
|
||||
})
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue