overleaf/services/document-updater/scripts/remove_deleted_docs.js
Brian Gough 805517e728 Merge pull request #5961 from overleaf/bg-remove-deleted-docs
[document-updater] remove deleted docs from redis

GitOrigin-RevId: ec9ad55d3c5cd9b55f56599de671068c00442f49
2021-12-01 09:03:47 +00:00

173 lines
4.3 KiB
JavaScript

const Settings = require('@overleaf/settings')
const logger = require('@overleaf/logger')
const rclient = require('@overleaf/redis-wrapper').createClient(
Settings.redis.documentupdater
)
const keys = Settings.redis.documentupdater.key_schema
const ProjectFlusher = require('app/js/ProjectFlusher')
const RedisManager = require('app/js/RedisManager')
const util = require('util')
const getDoc = util.promisify((projectId, docId, cb) =>
RedisManager.getDoc(projectId, docId, (err, ...args) => cb(err, args))
)
const removeDocFromMemory = util.promisify(RedisManager.removeDocFromMemory)
const { MongoClient, ObjectId } = require('mongodb')
const clientPromise = MongoClient.connect(
Settings.mongo.url,
Settings.mongo.options
)
const summary = { totalDocs: 0, deletedDocs: 0, skippedDocs: 0 }
const db = {}
clientPromise.then(client => {
db.docs = client.db().collection('docs')
db.projects = client.db().collection('projects')
})
async function removeDeletedDocs(dockeys, options) {
const docIds = ProjectFlusher._extractIds(dockeys)
for (const docId of docIds) {
summary.totalDocs++
const docCount = await db.docs.find({ _id: ObjectId(docId) }).count()
if (!docCount) {
try {
await removeDeletedDoc(docId, options)
} catch (err) {
logger.error({ docId, err }, 'error removing deleted doc')
}
}
}
}
async function removeDeletedDoc(docId, options) {
const projectId = await rclient.get(keys.projectKey({ doc_id: docId }))
const [
docLines,
version,
ranges,
pathname,
projectHistoryId,
unflushedTime,
lastUpdatedAt,
lastUpdatedBy,
] = await getDoc(projectId, docId)
const project = await db.projects.findOne({ _id: ObjectId(projectId) })
let status
if (project) {
const projectJSON = JSON.stringify(project.rootFolder)
const containsDoc = projectJSON.indexOf(docId) !== -1
if (containsDoc) {
logger.warn(
{
projectId,
docId,
docLinesBytes: docLines && docLines.length,
version,
rangesBytes: ranges && ranges.length,
pathname,
projectHistoryId,
unflushedTime,
lastUpdatedAt,
lastUpdatedBy,
},
'refusing to delete doc, project contains docId'
)
summary.skippedDocs++
return
} else {
logger.warn(
{
projectId,
docId,
docLinesBytes: docLines && docLines.length,
version,
rangesBytes: ranges && ranges.length,
pathname,
projectHistoryId,
unflushedTime,
lastUpdatedAt,
lastUpdatedBy,
},
'refusing to delete doc, project still exists'
)
summary.skippedDocs++
return
}
} else {
status = 'projectDeleted'
}
summary.deletedDocs++
if (options.dryRun) {
logger.info(
{
projectId,
docId,
docLinesBytes: docLines && docLines.length,
version,
rangesBytes: ranges && ranges.length,
pathname,
projectHistoryId,
unflushedTime,
lastUpdatedAt,
lastUpdatedBy,
status,
summary,
},
'dry run mode - would remove doc from redis'
)
return
}
removeDocFromMemory(projectId, docId)
logger.info(
{
projectId,
docId,
docLinesBytes: docLines && docLines.length,
version,
rangesBytes: ranges && ranges.length,
pathname,
projectHistoryId,
unflushedTime,
lastUpdatedAt,
lastUpdatedBy,
status,
summary,
},
'removed doc from redis'
)
}
async function findAndProcessDocs(options) {
logger.info({ options }, 'removing deleted docs')
let cursor = 0
do {
const [newCursor, doclinesKeys] = await rclient.scan(
cursor,
'MATCH',
keys.docLines({ doc_id: '*' }),
'COUNT',
options.limit
)
await removeDeletedDocs(doclinesKeys, options)
cursor = newCursor
} while (cursor !== '0')
}
clientPromise.then(client => {
findAndProcessDocs({ limit: 1000, dryRun: process.env.DRY_RUN !== 'false' })
.then(result => {
rclient.quit()
client.close()
console.log('DONE')
})
.catch(function (error) {
console.error(error)
process.exit(1)
})
})