2020-02-19 06:14:28 -05:00
|
|
|
/* eslint-disable
|
|
|
|
no-return-assign,
|
|
|
|
no-unused-vars,
|
|
|
|
node/no-deprecated-api,
|
|
|
|
*/
|
|
|
|
// TODO: This file was created by bulk-decaffeinate.
|
|
|
|
// Fix any style issues and re-enable lint.
|
2020-02-19 06:14:14 -05:00
|
|
|
/*
|
|
|
|
* decaffeinate suggestions:
|
|
|
|
* DS102: Remove unnecessary code created because of implicit returns
|
|
|
|
* DS207: Consider shorter variations of null checks
|
|
|
|
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
|
|
|
*/
|
2020-06-02 04:51:34 -04:00
|
|
|
let UrlFetcher
|
2020-02-19 06:14:37 -05:00
|
|
|
const request = require('request').defaults({ jar: false })
|
|
|
|
const fs = require('fs')
|
2022-03-01 10:09:36 -05:00
|
|
|
const logger = require('@overleaf/logger')
|
2021-07-12 12:47:21 -04:00
|
|
|
const settings = require('@overleaf/settings')
|
2020-05-14 08:09:57 -04:00
|
|
|
const async = require('async')
|
2021-10-20 06:17:59 -04:00
|
|
|
const { URL } = require('url')
|
2021-10-06 04:11:59 -04:00
|
|
|
const { promisify } = require('util')
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
const oneMinute = 60 * 1000
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
module.exports = UrlFetcher = {
|
2020-05-14 08:09:57 -04:00
|
|
|
pipeUrlToFileWithRetry(url, filePath, callback) {
|
2020-08-10 12:01:11 -04:00
|
|
|
const doDownload = function (cb) {
|
2020-05-14 08:09:57 -04:00
|
|
|
UrlFetcher.pipeUrlToFile(url, filePath, cb)
|
|
|
|
}
|
|
|
|
async.retry(3, doDownload, callback)
|
|
|
|
},
|
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
pipeUrlToFile(url, filePath, _callback) {
|
|
|
|
if (_callback == null) {
|
2021-10-27 05:49:18 -04:00
|
|
|
_callback = function () {}
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
2020-08-10 12:01:11 -04:00
|
|
|
const callbackOnce = function (error) {
|
2020-02-19 06:14:37 -05:00
|
|
|
if (timeoutHandler != null) {
|
|
|
|
clearTimeout(timeoutHandler)
|
|
|
|
}
|
|
|
|
_callback(error)
|
2020-08-10 12:01:11 -04:00
|
|
|
return (_callback = function () {})
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2021-10-20 06:17:59 -04:00
|
|
|
const u = new URL(url)
|
2021-07-02 04:17:29 -04:00
|
|
|
if (
|
|
|
|
settings.filestoreDomainOveride &&
|
|
|
|
u.host !== settings.apis.clsiPerf.host
|
|
|
|
) {
|
2021-10-20 06:17:59 -04:00
|
|
|
url = `${settings.filestoreDomainOveride}${u.pathname}${u.search}`
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
2021-10-26 04:08:56 -04:00
|
|
|
let timeoutHandler = setTimeout(
|
2020-08-10 12:01:11 -04:00
|
|
|
function () {
|
2020-02-19 06:14:37 -05:00
|
|
|
timeoutHandler = null
|
|
|
|
logger.error({ url, filePath }, 'Timed out downloading file to cache')
|
|
|
|
return callbackOnce(
|
|
|
|
new Error(`Timed out downloading file to cache ${url}`)
|
|
|
|
)
|
|
|
|
},
|
|
|
|
// FIXME: maybe need to close fileStream here
|
|
|
|
3 * oneMinute
|
|
|
|
)
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug({ url, filePath }, 'started downloading url to cache')
|
2020-02-19 06:14:37 -05:00
|
|
|
const urlStream = request.get({ url, timeout: oneMinute })
|
|
|
|
urlStream.pause() // stop data flowing until we are ready
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
// attach handlers before setting up pipes
|
2020-08-10 12:01:11 -04:00
|
|
|
urlStream.on('error', function (error) {
|
2020-02-19 06:14:37 -05:00
|
|
|
logger.error({ err: error, url, filePath }, 'error downloading url')
|
|
|
|
return callbackOnce(
|
|
|
|
error || new Error(`Something went wrong downloading the URL ${url}`)
|
|
|
|
)
|
|
|
|
})
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
urlStream.on('end', () =>
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug({ url, filePath }, 'finished downloading file into cache')
|
2020-02-19 06:14:37 -05:00
|
|
|
)
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-08-10 12:01:11 -04:00
|
|
|
return urlStream.on('response', function (res) {
|
2020-02-19 06:14:37 -05:00
|
|
|
if (res.statusCode >= 200 && res.statusCode < 300) {
|
2021-10-06 04:11:59 -04:00
|
|
|
const atomicWrite = filePath + '~'
|
|
|
|
const fileStream = fs.createWriteStream(atomicWrite)
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
// attach handlers before setting up pipes
|
2020-08-10 12:01:11 -04:00
|
|
|
fileStream.on('error', function (error) {
|
2020-02-19 06:14:37 -05:00
|
|
|
logger.error(
|
|
|
|
{ err: error, url, filePath },
|
|
|
|
'error writing file into cache'
|
|
|
|
)
|
2021-10-06 04:11:59 -04:00
|
|
|
return fs.unlink(atomicWrite, function (err) {
|
2020-02-19 06:14:37 -05:00
|
|
|
if (err != null) {
|
|
|
|
logger.err({ err, filePath }, 'error deleting file from cache')
|
|
|
|
}
|
|
|
|
return callbackOnce(error)
|
|
|
|
})
|
|
|
|
})
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-08-10 12:01:11 -04:00
|
|
|
fileStream.on('finish', function () {
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug({ url, filePath }, 'finished writing file into cache')
|
2021-10-06 04:11:59 -04:00
|
|
|
fs.rename(atomicWrite, filePath, error => {
|
|
|
|
if (error) {
|
|
|
|
fs.unlink(atomicWrite, () => callbackOnce(error))
|
|
|
|
} else {
|
|
|
|
callbackOnce()
|
|
|
|
}
|
|
|
|
})
|
2020-02-19 06:14:37 -05:00
|
|
|
})
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
fileStream.on('pipe', () =>
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug({ url, filePath }, 'piping into filestream')
|
2020-02-19 06:14:37 -05:00
|
|
|
)
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
urlStream.pipe(fileStream)
|
|
|
|
return urlStream.resume() // now we are ready to handle the data
|
|
|
|
} else {
|
|
|
|
logger.error(
|
|
|
|
{ statusCode: res.statusCode, url, filePath },
|
|
|
|
'unexpected status code downloading url to cache'
|
|
|
|
)
|
|
|
|
// https://nodejs.org/api/http.html#http_class_http_clientrequest
|
|
|
|
// If you add a 'response' event handler, then you must consume
|
|
|
|
// the data from the response object, either by calling
|
|
|
|
// response.read() whenever there is a 'readable' event, or by
|
|
|
|
// adding a 'data' handler, or by calling the .resume()
|
|
|
|
// method. Until the data is consumed, the 'end' event will not
|
|
|
|
// fire. Also, until the data is read it will consume memory
|
|
|
|
// that can eventually lead to a 'process out of memory' error.
|
|
|
|
urlStream.resume() // discard the data
|
|
|
|
return callbackOnce(
|
|
|
|
new Error(
|
|
|
|
`URL returned non-success status code: ${res.statusCode} ${url}`
|
|
|
|
)
|
|
|
|
)
|
|
|
|
}
|
|
|
|
})
|
2021-07-13 07:04:48 -04:00
|
|
|
},
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
2021-10-06 04:11:59 -04:00
|
|
|
|
|
|
|
module.exports.promises = {
|
|
|
|
pipeUrlToFileWithRetry: promisify(UrlFetcher.pipeUrlToFileWithRetry),
|
|
|
|
}
|