2020-02-19 06:14:14 -05:00
|
|
|
/*
|
|
|
|
* decaffeinate suggestions:
|
|
|
|
* DS102: Remove unnecessary code created because of implicit returns
|
|
|
|
* DS207: Consider shorter variations of null checks
|
|
|
|
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
|
|
|
*/
|
|
|
|
let UrlFetcher;
|
|
|
|
const request = require("request").defaults({jar: false});
|
|
|
|
const fs = require("fs");
|
|
|
|
const logger = require("logger-sharelatex");
|
|
|
|
const settings = require("settings-sharelatex");
|
|
|
|
const URL = require('url');
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:14 -05:00
|
|
|
const oneMinute = 60 * 1000;
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:14 -05:00
|
|
|
module.exports = (UrlFetcher = {
|
|
|
|
pipeUrlToFile(url, filePath, _callback) {
|
|
|
|
if (_callback == null) { _callback = function(error) {}; }
|
|
|
|
const callbackOnce = function(error) {
|
|
|
|
if (timeoutHandler != null) { clearTimeout(timeoutHandler); }
|
|
|
|
_callback(error);
|
|
|
|
return _callback = function() {};
|
|
|
|
};
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:14 -05:00
|
|
|
if (settings.filestoreDomainOveride != null) {
|
|
|
|
const p = URL.parse(url).path;
|
|
|
|
url = `${settings.filestoreDomainOveride}${p}`;
|
|
|
|
}
|
|
|
|
var timeoutHandler = setTimeout(function() {
|
|
|
|
timeoutHandler = null;
|
|
|
|
logger.error({url, filePath}, "Timed out downloading file to cache");
|
|
|
|
return callbackOnce(new Error(`Timed out downloading file to cache ${url}`));
|
|
|
|
}
|
|
|
|
// FIXME: maybe need to close fileStream here
|
|
|
|
, 3 * oneMinute);
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:14 -05:00
|
|
|
logger.log({url, filePath}, "started downloading url to cache");
|
|
|
|
const urlStream = request.get({url, timeout: oneMinute});
|
|
|
|
urlStream.pause(); // stop data flowing until we are ready
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:14 -05:00
|
|
|
// attach handlers before setting up pipes
|
|
|
|
urlStream.on("error", function(error) {
|
|
|
|
logger.error({err: error, url, filePath}, "error downloading url");
|
|
|
|
return callbackOnce(error || new Error(`Something went wrong downloading the URL ${url}`));
|
|
|
|
});
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:14 -05:00
|
|
|
urlStream.on("end", () => logger.log({url, filePath}, "finished downloading file into cache"));
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:14 -05:00
|
|
|
return urlStream.on("response", function(res) {
|
|
|
|
if ((res.statusCode >= 200) && (res.statusCode < 300)) {
|
|
|
|
const fileStream = fs.createWriteStream(filePath);
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:14 -05:00
|
|
|
// attach handlers before setting up pipes
|
|
|
|
fileStream.on('error', function(error) {
|
|
|
|
logger.error({err: error, url, filePath}, "error writing file into cache");
|
|
|
|
return fs.unlink(filePath, function(err) {
|
|
|
|
if (err != null) {
|
|
|
|
logger.err({err, filePath}, "error deleting file from cache");
|
|
|
|
}
|
|
|
|
return callbackOnce(error);
|
|
|
|
});
|
|
|
|
});
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:14 -05:00
|
|
|
fileStream.on('finish', function() {
|
|
|
|
logger.log({url, filePath}, "finished writing file into cache");
|
|
|
|
return callbackOnce();
|
|
|
|
});
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:14 -05:00
|
|
|
fileStream.on('pipe', () => logger.log({url, filePath}, "piping into filestream"));
|
2015-05-15 09:07:15 -04:00
|
|
|
|
2020-02-19 06:14:14 -05:00
|
|
|
urlStream.pipe(fileStream);
|
|
|
|
return urlStream.resume(); // now we are ready to handle the data
|
|
|
|
} else {
|
|
|
|
logger.error({statusCode: res.statusCode, url, filePath}, "unexpected status code downloading url to cache");
|
|
|
|
// https://nodejs.org/api/http.html#http_class_http_clientrequest
|
|
|
|
// If you add a 'response' event handler, then you must consume
|
|
|
|
// the data from the response object, either by calling
|
|
|
|
// response.read() whenever there is a 'readable' event, or by
|
|
|
|
// adding a 'data' handler, or by calling the .resume()
|
|
|
|
// method. Until the data is consumed, the 'end' event will not
|
|
|
|
// fire. Also, until the data is read it will consume memory
|
|
|
|
// that can eventually lead to a 'process out of memory' error.
|
|
|
|
urlStream.resume(); // discard the data
|
|
|
|
return callbackOnce(new Error(`URL returned non-success status code: ${res.statusCode} ${url}`));
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
});
|