overleaf/services/clsi/app/js/UrlFetcher.js

143 lines
4.7 KiB
JavaScript
Raw Normal View History

/* eslint-disable
no-return-assign,
no-unused-vars,
node/no-deprecated-api,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
2020-06-02 04:51:34 -04:00
let UrlFetcher
const request = require('request').defaults({ jar: false })
const fs = require('fs')
const logger = require('logger-sharelatex')
const settings = require('@overleaf/settings')
const async = require('async')
const { URL } = require('url')
const { promisify } = require('util')
2014-02-12 12:27:43 -05:00
const oneMinute = 60 * 1000
2015-05-15 09:07:15 -04:00
module.exports = UrlFetcher = {
pipeUrlToFileWithRetry(url, filePath, callback) {
2020-08-10 12:01:11 -04:00
const doDownload = function (cb) {
UrlFetcher.pipeUrlToFile(url, filePath, cb)
}
async.retry(3, doDownload, callback)
},
pipeUrlToFile(url, filePath, _callback) {
if (_callback == null) {
_callback = function () {}
}
2020-08-10 12:01:11 -04:00
const callbackOnce = function (error) {
if (timeoutHandler != null) {
clearTimeout(timeoutHandler)
}
_callback(error)
2020-08-10 12:01:11 -04:00
return (_callback = function () {})
}
2015-05-15 09:07:15 -04:00
const u = new URL(url)
if (
settings.filestoreDomainOveride &&
u.host !== settings.apis.clsiPerf.host
) {
url = `${settings.filestoreDomainOveride}${u.pathname}${u.search}`
}
let timeoutHandler = setTimeout(
2020-08-10 12:01:11 -04:00
function () {
timeoutHandler = null
logger.error({ url, filePath }, 'Timed out downloading file to cache')
return callbackOnce(
new Error(`Timed out downloading file to cache ${url}`)
)
},
// FIXME: maybe need to close fileStream here
3 * oneMinute
)
2015-05-15 09:07:15 -04:00
logger.log({ url, filePath }, 'started downloading url to cache')
const urlStream = request.get({ url, timeout: oneMinute })
urlStream.pause() // stop data flowing until we are ready
2015-05-15 09:07:15 -04:00
// attach handlers before setting up pipes
2020-08-10 12:01:11 -04:00
urlStream.on('error', function (error) {
logger.error({ err: error, url, filePath }, 'error downloading url')
return callbackOnce(
error || new Error(`Something went wrong downloading the URL ${url}`)
)
})
2014-02-12 12:27:43 -05:00
urlStream.on('end', () =>
logger.log({ url, filePath }, 'finished downloading file into cache')
)
2014-02-12 12:27:43 -05:00
2020-08-10 12:01:11 -04:00
return urlStream.on('response', function (res) {
if (res.statusCode >= 200 && res.statusCode < 300) {
const atomicWrite = filePath + '~'
const fileStream = fs.createWriteStream(atomicWrite)
2015-05-15 09:07:15 -04:00
// attach handlers before setting up pipes
2020-08-10 12:01:11 -04:00
fileStream.on('error', function (error) {
logger.error(
{ err: error, url, filePath },
'error writing file into cache'
)
return fs.unlink(atomicWrite, function (err) {
if (err != null) {
logger.err({ err, filePath }, 'error deleting file from cache')
}
return callbackOnce(error)
})
})
2015-05-15 09:07:15 -04:00
2020-08-10 12:01:11 -04:00
fileStream.on('finish', function () {
logger.log({ url, filePath }, 'finished writing file into cache')
fs.rename(atomicWrite, filePath, error => {
if (error) {
fs.unlink(atomicWrite, () => callbackOnce(error))
} else {
callbackOnce()
}
})
})
2015-05-15 09:07:15 -04:00
fileStream.on('pipe', () =>
logger.log({ url, filePath }, 'piping into filestream')
)
2015-05-15 09:07:15 -04:00
urlStream.pipe(fileStream)
return urlStream.resume() // now we are ready to handle the data
} else {
logger.error(
{ statusCode: res.statusCode, url, filePath },
'unexpected status code downloading url to cache'
)
// https://nodejs.org/api/http.html#http_class_http_clientrequest
// If you add a 'response' event handler, then you must consume
// the data from the response object, either by calling
// response.read() whenever there is a 'readable' event, or by
// adding a 'data' handler, or by calling the .resume()
// method. Until the data is consumed, the 'end' event will not
// fire. Also, until the data is read it will consume memory
// that can eventually lead to a 'process out of memory' error.
urlStream.resume() // discard the data
return callbackOnce(
new Error(
`URL returned non-success status code: ${res.statusCode} ${url}`
)
)
}
})
2021-07-13 07:04:48 -04:00
},
}
module.exports.promises = {
pipeUrlToFileWithRetry: promisify(UrlFetcher.pipeUrlToFileWithRetry),
}