overleaf/services/clsi/app/js/UrlCache.js

276 lines
7.7 KiB
JavaScript
Raw Normal View History

/* eslint-disable
camelcase,
handle-callback-err,
no-return-assign,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let UrlCache
const db = require('./db')
const dbQueue = require('./DbQueue')
const UrlFetcher = require('./UrlFetcher')
const Settings = require('settings-sharelatex')
const crypto = require('crypto')
const fs = require('fs')
const logger = require('logger-sharelatex')
const async = require('async')
const Metrics = require('./Metrics')
2014-02-12 12:27:43 -05:00
module.exports = UrlCache = {
downloadUrlToFile(project_id, url, destPath, lastModified, callback) {
if (callback == null) {
2020-08-10 12:01:11 -04:00
callback = function (error) {}
}
return UrlCache._ensureUrlIsInCache(
project_id,
url,
lastModified,
(error, pathToCachedUrl) => {
if (error != null) {
return callback(error)
}
return fs.copyFile(pathToCachedUrl, destPath, function (error) {
if (error != null) {
logger.error(
{ err: error, from: pathToCachedUrl, to: destPath },
'error copying file from cache'
)
return UrlCache._clearUrlDetails(project_id, url, () =>
callback(error)
)
} else {
return callback(error)
}
})
}
)
},
2014-02-12 12:27:43 -05:00
clearProject(project_id, callback) {
if (callback == null) {
2020-08-10 12:01:11 -04:00
callback = function (error) {}
}
2020-08-10 12:01:11 -04:00
return UrlCache._findAllUrlsInProject(project_id, function (error, urls) {
logger.log(
{ project_id, url_count: urls.length },
'clearing project URLs'
)
if (error != null) {
return callback(error)
}
2020-08-10 12:01:11 -04:00
const jobs = Array.from(urls || []).map((url) =>
((url) => (callback) =>
UrlCache._clearUrlFromCache(project_id, url, function (error) {
if (error != null) {
logger.error(
{ err: error, project_id, url },
'error clearing project URL'
)
}
return callback()
}))(url)
)
return async.series(jobs, callback)
})
},
2014-02-12 12:27:43 -05:00
_ensureUrlIsInCache(project_id, url, lastModified, callback) {
if (callback == null) {
2020-08-10 12:01:11 -04:00
callback = function (error, pathOnDisk) {}
}
if (lastModified != null) {
// MYSQL only stores dates to an accuracy of a second but the incoming lastModified might have milliseconds.
// So round down to seconds
lastModified = new Date(Math.floor(lastModified.getTime() / 1000) * 1000)
}
return UrlCache._doesUrlNeedDownloading(
project_id,
url,
lastModified,
(error, needsDownloading) => {
if (error != null) {
return callback(error)
}
if (needsDownloading) {
logger.log({ url, lastModified }, 'downloading URL')
return UrlFetcher.pipeUrlToFileWithRetry(
url,
UrlCache._cacheFilePathForUrl(project_id, url),
2020-08-10 12:01:11 -04:00
(error) => {
if (error != null) {
return callback(error)
}
return UrlCache._updateOrCreateUrlDetails(
project_id,
url,
lastModified,
2020-08-10 12:01:11 -04:00
(error) => {
if (error != null) {
return callback(error)
}
return callback(
null,
UrlCache._cacheFilePathForUrl(project_id, url)
)
}
)
}
)
} else {
logger.log({ url, lastModified }, 'URL is up to date in cache')
return callback(null, UrlCache._cacheFilePathForUrl(project_id, url))
}
}
)
},
2014-02-12 12:27:43 -05:00
_doesUrlNeedDownloading(project_id, url, lastModified, callback) {
if (callback == null) {
2020-08-10 12:01:11 -04:00
callback = function (error, needsDownloading) {}
}
if (lastModified == null) {
return callback(null, true)
}
2020-08-10 12:01:11 -04:00
return UrlCache._findUrlDetails(project_id, url, function (
error,
urlDetails
) {
if (error != null) {
return callback(error)
}
if (
urlDetails == null ||
urlDetails.lastModified == null ||
urlDetails.lastModified.getTime() < lastModified.getTime()
) {
return callback(null, true)
} else {
return callback(null, false)
}
})
},
2014-02-12 12:27:43 -05:00
_cacheFileNameForUrl(project_id, url) {
2020-08-10 12:01:11 -04:00
return project_id + ':' + crypto.createHash('md5').update(url).digest('hex')
},
2014-02-12 12:27:43 -05:00
_cacheFilePathForUrl(project_id, url) {
return `${Settings.path.clsiCacheDir}/${UrlCache._cacheFileNameForUrl(
project_id,
url
)}`
},
2014-02-12 12:27:43 -05:00
_clearUrlFromCache(project_id, url, callback) {
if (callback == null) {
2020-08-10 12:01:11 -04:00
callback = function (error) {}
}
2020-08-10 12:01:11 -04:00
return UrlCache._clearUrlDetails(project_id, url, function (error) {
if (error != null) {
return callback(error)
}
2020-08-10 12:01:11 -04:00
return UrlCache._deleteUrlCacheFromDisk(project_id, url, function (
error
) {
if (error != null) {
return callback(error)
}
return callback(null)
})
})
},
2014-02-12 12:27:43 -05:00
_deleteUrlCacheFromDisk(project_id, url, callback) {
if (callback == null) {
2020-08-10 12:01:11 -04:00
callback = function (error) {}
}
2020-08-10 12:01:11 -04:00
return fs.unlink(UrlCache._cacheFilePathForUrl(project_id, url), function (
error
) {
if (error != null && error.code !== 'ENOENT') {
// no error if the file isn't present
return callback(error)
} else {
return callback()
}
})
},
2014-02-12 12:27:43 -05:00
_findUrlDetails(project_id, url, callback) {
if (callback == null) {
2020-08-10 12:01:11 -04:00
callback = function (error, urlDetails) {}
}
const timer = new Metrics.Timer('db-find-url-details')
2020-08-10 12:01:11 -04:00
const job = (cb) =>
2020-03-16 11:31:02 -04:00
db.UrlCache.findOne({ where: { url, project_id } })
2020-08-10 12:01:11 -04:00
.then((urlDetails) => cb(null, urlDetails))
.error(cb)
dbQueue.queue.push(job, (error, urlDetails) => {
timer.done()
callback(error, urlDetails)
})
},
2014-02-12 12:27:43 -05:00
_updateOrCreateUrlDetails(project_id, url, lastModified, callback) {
if (callback == null) {
2020-08-10 12:01:11 -04:00
callback = function (error) {}
}
const timer = new Metrics.Timer('db-update-or-create-url-details')
2020-08-10 12:01:11 -04:00
const job = (cb) =>
db.UrlCache.findOrCreate({ where: { url, project_id } })
.spread((urlDetails, created) =>
urlDetails
2020-03-16 11:31:02 -04:00
.update({ lastModified })
.then(() => cb())
.error(cb)
)
.error(cb)
dbQueue.queue.push(job, (error) => {
timer.done()
callback(error)
})
},
_clearUrlDetails(project_id, url, callback) {
if (callback == null) {
2020-08-10 12:01:11 -04:00
callback = function (error) {}
}
const timer = new Metrics.Timer('db-clear-url-details')
2020-08-10 12:01:11 -04:00
const job = (cb) =>
db.UrlCache.destroy({ where: { url, project_id } })
.then(() => cb(null))
.error(cb)
dbQueue.queue.push(job, (error) => {
timer.done()
callback(error)
})
},
2014-02-12 12:27:43 -05:00
_findAllUrlsInProject(project_id, callback) {
if (callback == null) {
2020-08-10 12:01:11 -04:00
callback = function (error, urls) {}
}
const timer = new Metrics.Timer('db-find-urls-in-project')
2020-08-10 12:01:11 -04:00
const job = (cb) =>
db.UrlCache.findAll({ where: { project_id } })
2020-08-10 12:01:11 -04:00
.then((urlEntries) =>
cb(
null,
2020-08-10 12:01:11 -04:00
urlEntries.map((entry) => entry.url)
)
)
.error(cb)
dbQueue.queue.push(job, (err, urls) => {
timer.done()
callback(err, urls)
})
}
}