2024-11-08 05:21:56 -05:00
|
|
|
const fs = require('node:fs')
|
2022-03-01 10:09:36 -05:00
|
|
|
const logger = require('@overleaf/logger')
|
2023-04-27 06:16:41 -04:00
|
|
|
const Settings = require('@overleaf/settings')
|
2024-05-15 09:46:18 -04:00
|
|
|
const {
|
|
|
|
CustomHttpAgent,
|
|
|
|
CustomHttpsAgent,
|
|
|
|
fetchStream,
|
|
|
|
} = require('@overleaf/fetch-utils')
|
2024-11-08 05:21:56 -05:00
|
|
|
const { URL } = require('node:url')
|
|
|
|
const { pipeline } = require('node:stream/promises')
|
2024-01-17 04:22:46 -05:00
|
|
|
const Metrics = require('./Metrics')
|
2023-04-27 06:16:41 -04:00
|
|
|
|
2024-05-15 09:46:18 -04:00
|
|
|
const MAX_CONNECT_TIME = 1000
|
|
|
|
const httpAgent = new CustomHttpAgent({ connectTimeout: MAX_CONNECT_TIME })
|
|
|
|
const httpsAgent = new CustomHttpsAgent({ connectTimeout: MAX_CONNECT_TIME })
|
|
|
|
|
2023-04-27 06:16:41 -04:00
|
|
|
async function pipeUrlToFileWithRetry(url, filePath) {
|
|
|
|
let remainingAttempts = 3
|
|
|
|
let lastErr
|
|
|
|
while (remainingAttempts-- > 0) {
|
2024-01-17 04:22:46 -05:00
|
|
|
const timer = new Metrics.Timer('url_fetcher', {
|
|
|
|
path: lastErr ? ' retry' : 'fetch',
|
|
|
|
})
|
2023-04-27 06:16:41 -04:00
|
|
|
try {
|
|
|
|
await pipeUrlToFile(url, filePath)
|
2024-01-17 04:22:46 -05:00
|
|
|
timer.done({ status: 'success' })
|
2023-04-27 06:16:41 -04:00
|
|
|
return
|
|
|
|
} catch (err) {
|
2024-01-17 04:22:46 -05:00
|
|
|
timer.done({ status: 'error' })
|
2023-04-27 06:16:41 -04:00
|
|
|
logger.warn(
|
|
|
|
{ err, url, filePath, remainingAttempts },
|
|
|
|
'error downloading url'
|
2020-02-19 06:14:37 -05:00
|
|
|
)
|
2023-04-27 06:16:41 -04:00
|
|
|
lastErr = err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
throw lastErr
|
|
|
|
}
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2023-04-27 06:16:41 -04:00
|
|
|
async function pipeUrlToFile(url, filePath) {
|
|
|
|
const u = new URL(url)
|
|
|
|
if (
|
|
|
|
Settings.filestoreDomainOveride &&
|
|
|
|
u.host !== Settings.apis.clsiPerf.host
|
|
|
|
) {
|
|
|
|
url = `${Settings.filestoreDomainOveride}${u.pathname}${u.search}`
|
|
|
|
}
|
|
|
|
|
2023-07-10 08:17:47 -04:00
|
|
|
const stream = await fetchStream(url, {
|
|
|
|
signal: AbortSignal.timeout(60 * 1000),
|
2024-05-15 09:46:18 -04:00
|
|
|
// provide a function to get the agent for each request
|
|
|
|
// as there may be multiple requests with different protocols
|
|
|
|
// due to redirects.
|
|
|
|
agent: _url => (_url.protocol === 'https:' ? httpsAgent : httpAgent),
|
2023-07-10 08:17:47 -04:00
|
|
|
})
|
2023-04-27 06:16:41 -04:00
|
|
|
|
|
|
|
const atomicWrite = filePath + '~'
|
|
|
|
try {
|
2024-01-17 04:22:46 -05:00
|
|
|
const output = fs.createWriteStream(atomicWrite)
|
|
|
|
await pipeline(stream, output)
|
2023-04-27 06:16:41 -04:00
|
|
|
await fs.promises.rename(atomicWrite, filePath)
|
2024-01-17 04:22:46 -05:00
|
|
|
Metrics.count('UrlFetcher.downloaded_bytes', output.bytesWritten)
|
2023-04-27 06:16:41 -04:00
|
|
|
} catch (err) {
|
|
|
|
try {
|
|
|
|
await fs.promises.unlink(atomicWrite)
|
|
|
|
} catch (e) {}
|
|
|
|
throw err
|
|
|
|
}
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
2021-10-06 04:11:59 -04:00
|
|
|
|
|
|
|
module.exports.promises = {
|
2023-04-27 06:16:41 -04:00
|
|
|
pipeUrlToFileWithRetry,
|
2021-10-06 04:11:59 -04:00
|
|
|
}
|