2020-02-19 06:14:28 -05:00
|
|
|
/* eslint-disable
|
|
|
|
handle-callback-err,
|
|
|
|
no-return-assign,
|
|
|
|
no-unused-vars,
|
|
|
|
node/no-deprecated-api,
|
|
|
|
*/
|
|
|
|
// TODO: This file was created by bulk-decaffeinate.
|
|
|
|
// Fix any style issues and re-enable lint.
|
2020-02-19 06:14:14 -05:00
|
|
|
/*
|
|
|
|
* decaffeinate suggestions:
|
|
|
|
* DS102: Remove unnecessary code created because of implicit returns
|
|
|
|
* DS207: Consider shorter variations of null checks
|
|
|
|
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
|
|
|
*/
|
2020-02-19 06:14:37 -05:00
|
|
|
let UrlFetcher
|
|
|
|
const request = require('request').defaults({ jar: false })
|
|
|
|
const fs = require('fs')
|
|
|
|
const logger = require('logger-sharelatex')
|
|
|
|
const settings = require('settings-sharelatex')
|
|
|
|
const URL = require('url')
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
const oneMinute = 60 * 1000
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
module.exports = UrlFetcher = {
|
|
|
|
pipeUrlToFile(url, filePath, _callback) {
|
|
|
|
if (_callback == null) {
|
|
|
|
_callback = function(error) {}
|
|
|
|
}
|
|
|
|
const callbackOnce = function(error) {
|
|
|
|
if (timeoutHandler != null) {
|
|
|
|
clearTimeout(timeoutHandler)
|
|
|
|
}
|
|
|
|
_callback(error)
|
|
|
|
return (_callback = function() {})
|
|
|
|
}
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
if (settings.filestoreDomainOveride != null) {
|
|
|
|
const p = URL.parse(url).path
|
|
|
|
url = `${settings.filestoreDomainOveride}${p}`
|
|
|
|
}
|
|
|
|
var timeoutHandler = setTimeout(
|
|
|
|
function() {
|
|
|
|
timeoutHandler = null
|
|
|
|
logger.error({ url, filePath }, 'Timed out downloading file to cache')
|
|
|
|
return callbackOnce(
|
|
|
|
new Error(`Timed out downloading file to cache ${url}`)
|
|
|
|
)
|
|
|
|
},
|
|
|
|
// FIXME: maybe need to close fileStream here
|
|
|
|
3 * oneMinute
|
|
|
|
)
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
logger.log({ url, filePath }, 'started downloading url to cache')
|
|
|
|
const urlStream = request.get({ url, timeout: oneMinute })
|
|
|
|
urlStream.pause() // stop data flowing until we are ready
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
// attach handlers before setting up pipes
|
|
|
|
urlStream.on('error', function(error) {
|
|
|
|
logger.error({ err: error, url, filePath }, 'error downloading url')
|
|
|
|
return callbackOnce(
|
|
|
|
error || new Error(`Something went wrong downloading the URL ${url}`)
|
|
|
|
)
|
|
|
|
})
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
urlStream.on('end', () =>
|
|
|
|
logger.log({ url, filePath }, 'finished downloading file into cache')
|
|
|
|
)
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
return urlStream.on('response', function(res) {
|
|
|
|
if (res.statusCode >= 200 && res.statusCode < 300) {
|
|
|
|
const fileStream = fs.createWriteStream(filePath)
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
// attach handlers before setting up pipes
|
|
|
|
fileStream.on('error', function(error) {
|
|
|
|
logger.error(
|
|
|
|
{ err: error, url, filePath },
|
|
|
|
'error writing file into cache'
|
|
|
|
)
|
|
|
|
return fs.unlink(filePath, function(err) {
|
|
|
|
if (err != null) {
|
|
|
|
logger.err({ err, filePath }, 'error deleting file from cache')
|
|
|
|
}
|
|
|
|
return callbackOnce(error)
|
|
|
|
})
|
|
|
|
})
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
fileStream.on('finish', function() {
|
|
|
|
logger.log({ url, filePath }, 'finished writing file into cache')
|
|
|
|
return callbackOnce()
|
|
|
|
})
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
fileStream.on('pipe', () =>
|
|
|
|
logger.log({ url, filePath }, 'piping into filestream')
|
|
|
|
)
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
urlStream.pipe(fileStream)
|
|
|
|
return urlStream.resume() // now we are ready to handle the data
|
|
|
|
} else {
|
|
|
|
logger.error(
|
|
|
|
{ statusCode: res.statusCode, url, filePath },
|
|
|
|
'unexpected status code downloading url to cache'
|
|
|
|
)
|
|
|
|
// https://nodejs.org/api/http.html#http_class_http_clientrequest
|
|
|
|
// If you add a 'response' event handler, then you must consume
|
|
|
|
// the data from the response object, either by calling
|
|
|
|
// response.read() whenever there is a 'readable' event, or by
|
|
|
|
// adding a 'data' handler, or by calling the .resume()
|
|
|
|
// method. Until the data is consumed, the 'end' event will not
|
|
|
|
// fire. Also, until the data is read it will consume memory
|
|
|
|
// that can eventually lead to a 'process out of memory' error.
|
|
|
|
urlStream.resume() // discard the data
|
|
|
|
return callbackOnce(
|
|
|
|
new Error(
|
|
|
|
`URL returned non-success status code: ${res.statusCode} ${url}`
|
|
|
|
)
|
|
|
|
)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|