From 44d0a8d16222878b7d0f17d277bdeddfc1ebcdd8 Mon Sep 17 00:00:00 2001 From: Jakob Ackermann Date: Wed, 6 Oct 2021 10:12:09 +0200 Subject: [PATCH] Merge pull request #4650 from overleaf/jpa-in-memory-last-access-time [perf] store the last access time of a project in memory GitOrigin-RevId: 16e2bee28f58eced18f4c3ec5571ea9d10805cbb --- services/clsi/app.js | 8 +- .../clsi/app/js/ProjectPersistenceManager.js | 104 ++++++++++-------- 2 files changed, 57 insertions(+), 55 deletions(-) diff --git a/services/clsi/app.js b/services/clsi/app.js index b1b3b2004b..fbb2d4eba6 100644 --- a/services/clsi/app.js +++ b/services/clsi/app.js @@ -5,7 +5,6 @@ * DS207: Consider shorter variations of null checks * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md */ -const tenMinutes = 10 * 60 * 1000 const Metrics = require('@overleaf/metrics') Metrics.initialize('clsi') @@ -32,6 +31,7 @@ const OutputCacheManager = require('./app/js/OutputCacheManager') const ContentCacheManager = require('./app/js/ContentCacheManager') require('./app/js/db').sync() +ProjectPersistenceManager.init() const express = require('express') const bodyParser = require('body-parser') @@ -420,12 +420,6 @@ if (!module.parent) { module.exports = app -setInterval(() => { - ProjectPersistenceManager.refreshExpiryTimeout(() => { - ProjectPersistenceManager.clearExpiredProjects() - }) -}, tenMinutes) - function __guard__(value, transform) { return typeof value !== 'undefined' && value !== null ? transform(value) diff --git a/services/clsi/app/js/ProjectPersistenceManager.js b/services/clsi/app/js/ProjectPersistenceManager.js index c4abb69b6e..d79688fd1f 100644 --- a/services/clsi/app/js/ProjectPersistenceManager.js +++ b/services/clsi/app/js/ProjectPersistenceManager.js @@ -12,17 +12,19 @@ * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md */ let ProjectPersistenceManager -const Metrics = require('./Metrics') const UrlCache = require('./UrlCache') const CompileManager = require('./CompileManager') -const db = require('./db') -const dbQueue = require('./DbQueue') const async = require('async') const logger = require('logger-sharelatex') const oneDay = 24 * 60 * 60 * 1000 const Settings = require('@overleaf/settings') const diskusage = require('diskusage') const { callbackify } = require('util') +const Path = require('path') +const fs = require('fs') + +// projectId -> timestamp mapping. +const LAST_ACCESS = new Map() async function refreshExpiryTimeout() { const paths = [ @@ -61,26 +63,50 @@ module.exports = ProjectPersistenceManager = { }, refreshExpiryTimeout: callbackify(refreshExpiryTimeout), - markProjectAsJustAccessed(project_id, callback) { - if (callback == null) { - callback = function (error) {} - } - const timer = new Metrics.Timer('db-bump-last-accessed') - const job = cb => - db.Project.findOrCreate({ where: { project_id } }) - .spread((project, created) => - project - .update({ lastAccessed: new Date() }) - .then(() => cb()) - .error(cb) - ) - .error(cb) - dbQueue.queue.push(job, error => { - timer.done() - callback(error) + + init() { + fs.readdir(Settings.path.compilesDir, (err, dirs) => { + if (err) { + logger.warn({ err }, 'cannot get project listing') + dirs = [] + } + + async.eachLimit( + dirs, + 10, + (projectAndUserId, cb) => { + const compileDir = Path.join( + Settings.path.compilesDir, + projectAndUserId + ) + const projectId = projectAndUserId.slice(0, 24) + fs.stat(compileDir, (err, stats) => { + if (err) { + // Schedule for immediate cleanup + LAST_ACCESS.set(projectId, 0) + } else { + // Cleanup eventually. + LAST_ACCESS.set(projectId, stats.mtime.getTime()) + } + cb() + }) + }, + () => { + setInterval(() => { + ProjectPersistenceManager.refreshExpiryTimeout(() => { + ProjectPersistenceManager.clearExpiredProjects() + }) + }, 10 * 60 * 1000) + } + ) }) }, + markProjectAsJustAccessed(project_id, callback) { + LAST_ACCESS.set(project_id, Date.now()) + callback() + }, + clearExpiredProjects(callback) { if (callback == null) { callback = function (error) {} @@ -166,38 +192,20 @@ module.exports = ProjectPersistenceManager = { }, _clearProjectFromDatabase(project_id, callback) { - if (callback == null) { - callback = function (error) {} - } - logger.log({ project_id }, 'clearing project from database') - const job = cb => - db.Project.destroy({ where: { project_id } }) - .then(() => cb()) - .error(cb) - return dbQueue.queue.push(job, callback) + LAST_ACCESS.delete(project_id) + callback() }, _findExpiredProjectIds(callback) { - if (callback == null) { - callback = function (error, project_ids) {} + const expiredFrom = Date.now() - ProjectPersistenceManager.EXPIRY_TIMEOUT + const expiredProjectsIds = [] + for (const [projectId, lastAccess] of LAST_ACCESS.entries()) { + if (lastAccess < expiredFrom) { + expiredProjectsIds.push(projectId) + } } - const job = function (cb) { - const keepProjectsFrom = new Date( - Date.now() - ProjectPersistenceManager.EXPIRY_TIMEOUT - ) - const q = {} - q[db.op.lt] = keepProjectsFrom - return db.Project.findAll({ where: { lastAccessed: q } }) - .then(projects => - cb( - null, - projects.map(project => project.project_id) - ) - ) - .error(cb) - } - - return dbQueue.queue.push(job, callback) + // ^ may be a fairly busy loop, continue detached. + setTimeout(() => callback(null, expiredProjectsIds), 0) }, }