2014-02-12 12:27:43 -05:00
|
|
|
request = require("request").defaults(jar: false)
|
|
|
|
fs = require("fs")
|
2015-04-29 10:55:58 -04:00
|
|
|
logger = require "logger-sharelatex"
|
2018-07-03 11:41:34 -04:00
|
|
|
settings = require("settings-sharelatex")
|
|
|
|
URL = require('url');
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2015-05-15 09:07:15 -04:00
|
|
|
oneMinute = 60 * 1000
|
|
|
|
|
2014-02-12 12:27:43 -05:00
|
|
|
module.exports = UrlFetcher =
|
|
|
|
pipeUrlToFile: (url, filePath, _callback = (error) ->) ->
|
|
|
|
callbackOnce = (error) ->
|
2015-05-15 09:07:15 -04:00
|
|
|
clearTimeout timeoutHandler if timeoutHandler?
|
|
|
|
_callback(error)
|
|
|
|
_callback = () ->
|
|
|
|
|
2018-07-03 11:41:34 -04:00
|
|
|
if settings.filestoreDomainOveride?
|
|
|
|
p = URL.parse(url).path
|
|
|
|
url = "#{settings.filestoreDomainOveride}#{p}"
|
2015-05-15 09:07:15 -04:00
|
|
|
timeoutHandler = setTimeout () ->
|
|
|
|
timeoutHandler = null
|
|
|
|
logger.error url:url, filePath: filePath, "Timed out downloading file to cache"
|
|
|
|
callbackOnce(new Error("Timed out downloading file to cache #{url}"))
|
|
|
|
# FIXME: maybe need to close fileStream here
|
|
|
|
, 3 * oneMinute
|
|
|
|
|
|
|
|
logger.log url:url, filePath: filePath, "started downloading url to cache"
|
|
|
|
urlStream = request.get({url: url, timeout: oneMinute})
|
2015-05-21 06:33:13 -04:00
|
|
|
urlStream.pause() # stop data flowing until we are ready
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2015-05-21 06:33:13 -04:00
|
|
|
# attach handlers before setting up pipes
|
2015-05-15 09:07:15 -04:00
|
|
|
urlStream.on "error", (error) ->
|
|
|
|
logger.error err: error, url:url, filePath: filePath, "error downloading url"
|
|
|
|
callbackOnce(error or new Error("Something went wrong downloading the URL #{url}"))
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2015-05-15 09:07:15 -04:00
|
|
|
urlStream.on "end", () ->
|
|
|
|
logger.log url:url, filePath: filePath, "finished downloading file into cache"
|
2014-02-12 12:27:43 -05:00
|
|
|
|
|
|
|
urlStream.on "response", (res) ->
|
|
|
|
if res.statusCode >= 200 and res.statusCode < 300
|
2015-05-15 09:07:15 -04:00
|
|
|
fileStream = fs.createWriteStream(filePath)
|
|
|
|
|
2015-05-21 06:33:13 -04:00
|
|
|
# attach handlers before setting up pipes
|
2015-05-15 09:07:15 -04:00
|
|
|
fileStream.on 'error', (error) ->
|
|
|
|
logger.error err: error, url:url, filePath: filePath, "error writing file into cache"
|
|
|
|
fs.unlink filePath, (err) ->
|
|
|
|
if err?
|
|
|
|
logger.err err: err, filePath: filePath, "error deleting file from cache"
|
|
|
|
callbackOnce(error)
|
|
|
|
|
|
|
|
fileStream.on 'finish', () ->
|
|
|
|
logger.log url:url, filePath: filePath, "finished writing file into cache"
|
|
|
|
callbackOnce()
|
|
|
|
|
|
|
|
fileStream.on 'pipe', () ->
|
|
|
|
logger.log url:url, filePath: filePath, "piping into filestream"
|
|
|
|
|
2014-02-12 12:27:43 -05:00
|
|
|
urlStream.pipe(fileStream)
|
2015-05-21 06:33:13 -04:00
|
|
|
urlStream.resume() # now we are ready to handle the data
|
2014-02-12 12:27:43 -05:00
|
|
|
else
|
2015-04-29 10:58:28 -04:00
|
|
|
logger.error statusCode: res.statusCode, url:url, filePath: filePath, "unexpected status code downloading url to cache"
|
|
|
|
# https://nodejs.org/api/http.html#http_class_http_clientrequest
|
|
|
|
# If you add a 'response' event handler, then you must consume
|
|
|
|
# the data from the response object, either by calling
|
|
|
|
# response.read() whenever there is a 'readable' event, or by
|
|
|
|
# adding a 'data' handler, or by calling the .resume()
|
|
|
|
# method. Until the data is consumed, the 'end' event will not
|
|
|
|
# fire. Also, until the data is read it will consume memory
|
|
|
|
# that can eventually lead to a 'process out of memory' error.
|
2015-05-15 09:07:15 -04:00
|
|
|
urlStream.resume() # discard the data
|
2014-06-10 09:09:36 -04:00
|
|
|
callbackOnce(new Error("URL returned non-success status code: #{res.statusCode} #{url}"))
|