2020-02-19 06:14:28 -05:00
|
|
|
/* eslint-disable
|
|
|
|
camelcase,
|
|
|
|
handle-callback-err,
|
|
|
|
no-return-assign,
|
|
|
|
*/
|
|
|
|
// TODO: This file was created by bulk-decaffeinate.
|
|
|
|
// Fix any style issues and re-enable lint.
|
2020-02-19 06:14:14 -05:00
|
|
|
/*
|
|
|
|
* decaffeinate suggestions:
|
|
|
|
* DS101: Remove unnecessary use of Array.from
|
|
|
|
* DS102: Remove unnecessary code created because of implicit returns
|
|
|
|
* DS207: Consider shorter variations of null checks
|
|
|
|
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
|
|
|
*/
|
2020-02-19 06:14:37 -05:00
|
|
|
let UrlCache
|
|
|
|
const db = require('./db')
|
|
|
|
const dbQueue = require('./DbQueue')
|
|
|
|
const UrlFetcher = require('./UrlFetcher')
|
2021-07-12 12:47:21 -04:00
|
|
|
const Settings = require('@overleaf/settings')
|
2020-02-19 06:14:37 -05:00
|
|
|
const crypto = require('crypto')
|
|
|
|
const fs = require('fs')
|
|
|
|
const logger = require('logger-sharelatex')
|
|
|
|
const async = require('async')
|
2021-06-14 08:03:02 -04:00
|
|
|
const Metrics = require('./Metrics')
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
module.exports = UrlCache = {
|
|
|
|
downloadUrlToFile(project_id, url, destPath, lastModified, callback) {
|
|
|
|
if (callback == null) {
|
2020-08-10 12:01:11 -04:00
|
|
|
callback = function (error) {}
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
|
|
|
return UrlCache._ensureUrlIsInCache(
|
|
|
|
project_id,
|
|
|
|
url,
|
|
|
|
lastModified,
|
|
|
|
(error, pathToCachedUrl) => {
|
|
|
|
if (error != null) {
|
|
|
|
return callback(error)
|
|
|
|
}
|
2021-05-17 05:54:11 -04:00
|
|
|
return fs.copyFile(pathToCachedUrl, destPath, function (error) {
|
2020-02-19 06:14:37 -05:00
|
|
|
if (error != null) {
|
2021-05-14 10:49:20 -04:00
|
|
|
logger.error(
|
|
|
|
{ err: error, from: pathToCachedUrl, to: destPath },
|
|
|
|
'error copying file from cache'
|
|
|
|
)
|
2020-02-19 06:14:37 -05:00
|
|
|
return UrlCache._clearUrlDetails(project_id, url, () =>
|
|
|
|
callback(error)
|
|
|
|
)
|
|
|
|
} else {
|
|
|
|
return callback(error)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
)
|
|
|
|
},
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
clearProject(project_id, callback) {
|
|
|
|
if (callback == null) {
|
2020-08-10 12:01:11 -04:00
|
|
|
callback = function (error) {}
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
2020-08-10 12:01:11 -04:00
|
|
|
return UrlCache._findAllUrlsInProject(project_id, function (error, urls) {
|
2020-02-19 06:14:37 -05:00
|
|
|
logger.log(
|
|
|
|
{ project_id, url_count: urls.length },
|
|
|
|
'clearing project URLs'
|
|
|
|
)
|
|
|
|
if (error != null) {
|
|
|
|
return callback(error)
|
|
|
|
}
|
2021-07-13 07:04:48 -04:00
|
|
|
const jobs = Array.from(urls || []).map(url =>
|
|
|
|
(
|
|
|
|
url => callback =>
|
|
|
|
UrlCache._clearUrlFromCache(project_id, url, function (error) {
|
|
|
|
if (error != null) {
|
|
|
|
logger.error(
|
|
|
|
{ err: error, project_id, url },
|
|
|
|
'error clearing project URL'
|
|
|
|
)
|
|
|
|
}
|
|
|
|
return callback()
|
|
|
|
})
|
|
|
|
)(url)
|
2020-02-19 06:14:37 -05:00
|
|
|
)
|
|
|
|
return async.series(jobs, callback)
|
|
|
|
})
|
|
|
|
},
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
_ensureUrlIsInCache(project_id, url, lastModified, callback) {
|
|
|
|
if (callback == null) {
|
2020-08-10 12:01:11 -04:00
|
|
|
callback = function (error, pathOnDisk) {}
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
|
|
|
if (lastModified != null) {
|
|
|
|
// MYSQL only stores dates to an accuracy of a second but the incoming lastModified might have milliseconds.
|
|
|
|
// So round down to seconds
|
|
|
|
lastModified = new Date(Math.floor(lastModified.getTime() / 1000) * 1000)
|
|
|
|
}
|
|
|
|
return UrlCache._doesUrlNeedDownloading(
|
|
|
|
project_id,
|
|
|
|
url,
|
|
|
|
lastModified,
|
|
|
|
(error, needsDownloading) => {
|
|
|
|
if (error != null) {
|
|
|
|
return callback(error)
|
|
|
|
}
|
|
|
|
if (needsDownloading) {
|
|
|
|
logger.log({ url, lastModified }, 'downloading URL')
|
2020-05-14 08:09:57 -04:00
|
|
|
return UrlFetcher.pipeUrlToFileWithRetry(
|
2020-02-19 06:14:37 -05:00
|
|
|
url,
|
|
|
|
UrlCache._cacheFilePathForUrl(project_id, url),
|
2021-07-13 07:04:48 -04:00
|
|
|
error => {
|
2020-02-19 06:14:37 -05:00
|
|
|
if (error != null) {
|
|
|
|
return callback(error)
|
|
|
|
}
|
|
|
|
return UrlCache._updateOrCreateUrlDetails(
|
|
|
|
project_id,
|
|
|
|
url,
|
|
|
|
lastModified,
|
2021-07-13 07:04:48 -04:00
|
|
|
error => {
|
2020-02-19 06:14:37 -05:00
|
|
|
if (error != null) {
|
|
|
|
return callback(error)
|
|
|
|
}
|
|
|
|
return callback(
|
|
|
|
null,
|
|
|
|
UrlCache._cacheFilePathForUrl(project_id, url)
|
|
|
|
)
|
|
|
|
}
|
|
|
|
)
|
|
|
|
}
|
|
|
|
)
|
|
|
|
} else {
|
|
|
|
logger.log({ url, lastModified }, 'URL is up to date in cache')
|
|
|
|
return callback(null, UrlCache._cacheFilePathForUrl(project_id, url))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
)
|
|
|
|
},
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
_doesUrlNeedDownloading(project_id, url, lastModified, callback) {
|
|
|
|
if (callback == null) {
|
2020-08-10 12:01:11 -04:00
|
|
|
callback = function (error, needsDownloading) {}
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
|
|
|
if (lastModified == null) {
|
|
|
|
return callback(null, true)
|
|
|
|
}
|
2021-07-13 07:04:48 -04:00
|
|
|
return UrlCache._findUrlDetails(
|
|
|
|
project_id,
|
|
|
|
url,
|
|
|
|
function (error, urlDetails) {
|
|
|
|
if (error != null) {
|
|
|
|
return callback(error)
|
|
|
|
}
|
|
|
|
if (
|
|
|
|
urlDetails == null ||
|
|
|
|
urlDetails.lastModified == null ||
|
|
|
|
urlDetails.lastModified.getTime() < lastModified.getTime()
|
|
|
|
) {
|
|
|
|
return callback(null, true)
|
|
|
|
} else {
|
|
|
|
return callback(null, false)
|
|
|
|
}
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
2021-07-13 07:04:48 -04:00
|
|
|
)
|
2020-02-19 06:14:37 -05:00
|
|
|
},
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
_cacheFileNameForUrl(project_id, url) {
|
2020-08-10 12:01:11 -04:00
|
|
|
return project_id + ':' + crypto.createHash('md5').update(url).digest('hex')
|
2020-02-19 06:14:37 -05:00
|
|
|
},
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
_cacheFilePathForUrl(project_id, url) {
|
|
|
|
return `${Settings.path.clsiCacheDir}/${UrlCache._cacheFileNameForUrl(
|
|
|
|
project_id,
|
|
|
|
url
|
|
|
|
)}`
|
|
|
|
},
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
_clearUrlFromCache(project_id, url, callback) {
|
|
|
|
if (callback == null) {
|
2020-08-10 12:01:11 -04:00
|
|
|
callback = function (error) {}
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
2020-08-10 12:01:11 -04:00
|
|
|
return UrlCache._clearUrlDetails(project_id, url, function (error) {
|
2020-02-19 06:14:37 -05:00
|
|
|
if (error != null) {
|
|
|
|
return callback(error)
|
|
|
|
}
|
2021-07-13 07:04:48 -04:00
|
|
|
return UrlCache._deleteUrlCacheFromDisk(
|
|
|
|
project_id,
|
|
|
|
url,
|
|
|
|
function (error) {
|
|
|
|
if (error != null) {
|
|
|
|
return callback(error)
|
|
|
|
}
|
|
|
|
return callback(null)
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
2021-07-13 07:04:48 -04:00
|
|
|
)
|
2020-02-19 06:14:37 -05:00
|
|
|
})
|
|
|
|
},
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
_deleteUrlCacheFromDisk(project_id, url, callback) {
|
|
|
|
if (callback == null) {
|
2020-08-10 12:01:11 -04:00
|
|
|
callback = function (error) {}
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
2021-07-13 07:04:48 -04:00
|
|
|
return fs.unlink(
|
|
|
|
UrlCache._cacheFilePathForUrl(project_id, url),
|
|
|
|
function (error) {
|
|
|
|
if (error != null && error.code !== 'ENOENT') {
|
|
|
|
// no error if the file isn't present
|
|
|
|
return callback(error)
|
|
|
|
} else {
|
|
|
|
return callback()
|
|
|
|
}
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
2021-07-13 07:04:48 -04:00
|
|
|
)
|
2020-02-19 06:14:37 -05:00
|
|
|
},
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
_findUrlDetails(project_id, url, callback) {
|
|
|
|
if (callback == null) {
|
2020-08-10 12:01:11 -04:00
|
|
|
callback = function (error, urlDetails) {}
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
2021-06-14 08:03:02 -04:00
|
|
|
const timer = new Metrics.Timer('db-find-url-details')
|
2021-07-13 07:04:48 -04:00
|
|
|
const job = cb =>
|
2020-03-16 11:31:02 -04:00
|
|
|
db.UrlCache.findOne({ where: { url, project_id } })
|
2021-07-13 07:04:48 -04:00
|
|
|
.then(urlDetails => cb(null, urlDetails))
|
2020-02-19 06:14:37 -05:00
|
|
|
.error(cb)
|
2021-06-14 08:03:02 -04:00
|
|
|
dbQueue.queue.push(job, (error, urlDetails) => {
|
|
|
|
timer.done()
|
|
|
|
callback(error, urlDetails)
|
|
|
|
})
|
2020-02-19 06:14:37 -05:00
|
|
|
},
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
_updateOrCreateUrlDetails(project_id, url, lastModified, callback) {
|
|
|
|
if (callback == null) {
|
2020-08-10 12:01:11 -04:00
|
|
|
callback = function (error) {}
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
2021-06-14 08:03:02 -04:00
|
|
|
const timer = new Metrics.Timer('db-update-or-create-url-details')
|
2021-07-13 07:04:48 -04:00
|
|
|
const job = cb =>
|
2020-02-19 06:14:37 -05:00
|
|
|
db.UrlCache.findOrCreate({ where: { url, project_id } })
|
|
|
|
.spread((urlDetails, created) =>
|
|
|
|
urlDetails
|
2020-03-16 11:31:02 -04:00
|
|
|
.update({ lastModified })
|
2020-02-19 06:14:37 -05:00
|
|
|
.then(() => cb())
|
|
|
|
.error(cb)
|
|
|
|
)
|
|
|
|
.error(cb)
|
2021-07-13 07:04:48 -04:00
|
|
|
dbQueue.queue.push(job, error => {
|
2021-06-14 08:03:02 -04:00
|
|
|
timer.done()
|
|
|
|
callback(error)
|
|
|
|
})
|
2020-02-19 06:14:37 -05:00
|
|
|
},
|
2018-07-30 11:22:04 -04:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
_clearUrlDetails(project_id, url, callback) {
|
|
|
|
if (callback == null) {
|
2020-08-10 12:01:11 -04:00
|
|
|
callback = function (error) {}
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
2021-06-14 08:03:02 -04:00
|
|
|
const timer = new Metrics.Timer('db-clear-url-details')
|
2021-07-13 07:04:48 -04:00
|
|
|
const job = cb =>
|
2020-02-19 06:14:37 -05:00
|
|
|
db.UrlCache.destroy({ where: { url, project_id } })
|
|
|
|
.then(() => cb(null))
|
|
|
|
.error(cb)
|
2021-07-13 07:04:48 -04:00
|
|
|
dbQueue.queue.push(job, error => {
|
2021-06-14 08:03:02 -04:00
|
|
|
timer.done()
|
|
|
|
callback(error)
|
|
|
|
})
|
2020-02-19 06:14:37 -05:00
|
|
|
},
|
2014-02-12 12:27:43 -05:00
|
|
|
|
2020-02-19 06:14:37 -05:00
|
|
|
_findAllUrlsInProject(project_id, callback) {
|
|
|
|
if (callback == null) {
|
2020-08-10 12:01:11 -04:00
|
|
|
callback = function (error, urls) {}
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|
2021-06-14 08:03:02 -04:00
|
|
|
const timer = new Metrics.Timer('db-find-urls-in-project')
|
2021-07-13 07:04:48 -04:00
|
|
|
const job = cb =>
|
2020-02-19 06:14:37 -05:00
|
|
|
db.UrlCache.findAll({ where: { project_id } })
|
2021-07-13 07:04:48 -04:00
|
|
|
.then(urlEntries =>
|
2020-02-19 06:14:37 -05:00
|
|
|
cb(
|
|
|
|
null,
|
2021-07-13 07:04:48 -04:00
|
|
|
urlEntries.map(entry => entry.url)
|
2020-02-19 06:14:37 -05:00
|
|
|
)
|
|
|
|
)
|
|
|
|
.error(cb)
|
2021-06-14 08:03:02 -04:00
|
|
|
dbQueue.queue.push(job, (err, urls) => {
|
|
|
|
timer.done()
|
|
|
|
callback(err, urls)
|
|
|
|
})
|
2021-07-13 07:04:48 -04:00
|
|
|
},
|
2020-02-19 06:14:37 -05:00
|
|
|
}
|