overleaf/services/clsi/app/coffee/UrlFetcher.js

89 lines
3.4 KiB
JavaScript
Raw Normal View History

/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let UrlFetcher;
const request = require("request").defaults({jar: false});
const fs = require("fs");
const logger = require("logger-sharelatex");
const settings = require("settings-sharelatex");
const URL = require('url');
2014-02-12 12:27:43 -05:00
const oneMinute = 60 * 1000;
2015-05-15 09:07:15 -04:00
module.exports = (UrlFetcher = {
pipeUrlToFile(url, filePath, _callback) {
if (_callback == null) { _callback = function(error) {}; }
const callbackOnce = function(error) {
if (timeoutHandler != null) { clearTimeout(timeoutHandler); }
_callback(error);
return _callback = function() {};
};
2015-05-15 09:07:15 -04:00
if (settings.filestoreDomainOveride != null) {
const p = URL.parse(url).path;
url = `${settings.filestoreDomainOveride}${p}`;
}
var timeoutHandler = setTimeout(function() {
timeoutHandler = null;
logger.error({url, filePath}, "Timed out downloading file to cache");
return callbackOnce(new Error(`Timed out downloading file to cache ${url}`));
}
// FIXME: maybe need to close fileStream here
, 3 * oneMinute);
2015-05-15 09:07:15 -04:00
logger.log({url, filePath}, "started downloading url to cache");
const urlStream = request.get({url, timeout: oneMinute});
urlStream.pause(); // stop data flowing until we are ready
2015-05-15 09:07:15 -04:00
// attach handlers before setting up pipes
urlStream.on("error", function(error) {
logger.error({err: error, url, filePath}, "error downloading url");
return callbackOnce(error || new Error(`Something went wrong downloading the URL ${url}`));
});
2014-02-12 12:27:43 -05:00
urlStream.on("end", () => logger.log({url, filePath}, "finished downloading file into cache"));
2014-02-12 12:27:43 -05:00
return urlStream.on("response", function(res) {
if ((res.statusCode >= 200) && (res.statusCode < 300)) {
const fileStream = fs.createWriteStream(filePath);
2015-05-15 09:07:15 -04:00
// attach handlers before setting up pipes
fileStream.on('error', function(error) {
logger.error({err: error, url, filePath}, "error writing file into cache");
return fs.unlink(filePath, function(err) {
if (err != null) {
logger.err({err, filePath}, "error deleting file from cache");
}
return callbackOnce(error);
});
});
2015-05-15 09:07:15 -04:00
fileStream.on('finish', function() {
logger.log({url, filePath}, "finished writing file into cache");
return callbackOnce();
});
2015-05-15 09:07:15 -04:00
fileStream.on('pipe', () => logger.log({url, filePath}, "piping into filestream"));
2015-05-15 09:07:15 -04:00
urlStream.pipe(fileStream);
return urlStream.resume(); // now we are ready to handle the data
} else {
logger.error({statusCode: res.statusCode, url, filePath}, "unexpected status code downloading url to cache");
// https://nodejs.org/api/http.html#http_class_http_clientrequest
// If you add a 'response' event handler, then you must consume
// the data from the response object, either by calling
// response.read() whenever there is a 'readable' event, or by
// adding a 'data' handler, or by calling the .resume()
// method. Until the data is consumed, the 'end' event will not
// fire. Also, until the data is read it will consume memory
// that can eventually lead to a 'process out of memory' error.
urlStream.resume(); // discard the data
return callbackOnce(new Error(`URL returned non-success status code: ${res.statusCode} ${url}`));
}
});
}
});