mirror of
https://github.com/overleaf/overleaf.git
synced 2025-04-09 16:09:08 +00:00
Merge pull request #5961 from overleaf/bg-remove-deleted-docs
[document-updater] remove deleted docs from redis GitOrigin-RevId: ec9ad55d3c5cd9b55f56599de671068c00442f49
This commit is contained in:
parent
f6fc3d468c
commit
805517e728
1 changed files with 173 additions and 0 deletions
173
services/document-updater/scripts/remove_deleted_docs.js
Normal file
173
services/document-updater/scripts/remove_deleted_docs.js
Normal file
|
@ -0,0 +1,173 @@
|
|||
const Settings = require('@overleaf/settings')
|
||||
const logger = require('@overleaf/logger')
|
||||
const rclient = require('@overleaf/redis-wrapper').createClient(
|
||||
Settings.redis.documentupdater
|
||||
)
|
||||
const keys = Settings.redis.documentupdater.key_schema
|
||||
const ProjectFlusher = require('app/js/ProjectFlusher')
|
||||
const RedisManager = require('app/js/RedisManager')
|
||||
const util = require('util')
|
||||
const getDoc = util.promisify((projectId, docId, cb) =>
|
||||
RedisManager.getDoc(projectId, docId, (err, ...args) => cb(err, args))
|
||||
)
|
||||
const removeDocFromMemory = util.promisify(RedisManager.removeDocFromMemory)
|
||||
const { MongoClient, ObjectId } = require('mongodb')
|
||||
|
||||
const clientPromise = MongoClient.connect(
|
||||
Settings.mongo.url,
|
||||
Settings.mongo.options
|
||||
)
|
||||
|
||||
const summary = { totalDocs: 0, deletedDocs: 0, skippedDocs: 0 }
|
||||
|
||||
const db = {}
|
||||
clientPromise.then(client => {
|
||||
db.docs = client.db().collection('docs')
|
||||
db.projects = client.db().collection('projects')
|
||||
})
|
||||
|
||||
async function removeDeletedDocs(dockeys, options) {
|
||||
const docIds = ProjectFlusher._extractIds(dockeys)
|
||||
for (const docId of docIds) {
|
||||
summary.totalDocs++
|
||||
const docCount = await db.docs.find({ _id: ObjectId(docId) }).count()
|
||||
if (!docCount) {
|
||||
try {
|
||||
await removeDeletedDoc(docId, options)
|
||||
} catch (err) {
|
||||
logger.error({ docId, err }, 'error removing deleted doc')
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function removeDeletedDoc(docId, options) {
|
||||
const projectId = await rclient.get(keys.projectKey({ doc_id: docId }))
|
||||
|
||||
const [
|
||||
docLines,
|
||||
version,
|
||||
ranges,
|
||||
pathname,
|
||||
projectHistoryId,
|
||||
unflushedTime,
|
||||
lastUpdatedAt,
|
||||
lastUpdatedBy,
|
||||
] = await getDoc(projectId, docId)
|
||||
|
||||
const project = await db.projects.findOne({ _id: ObjectId(projectId) })
|
||||
|
||||
let status
|
||||
|
||||
if (project) {
|
||||
const projectJSON = JSON.stringify(project.rootFolder)
|
||||
const containsDoc = projectJSON.indexOf(docId) !== -1
|
||||
if (containsDoc) {
|
||||
logger.warn(
|
||||
{
|
||||
projectId,
|
||||
docId,
|
||||
docLinesBytes: docLines && docLines.length,
|
||||
version,
|
||||
rangesBytes: ranges && ranges.length,
|
||||
pathname,
|
||||
projectHistoryId,
|
||||
unflushedTime,
|
||||
lastUpdatedAt,
|
||||
lastUpdatedBy,
|
||||
},
|
||||
'refusing to delete doc, project contains docId'
|
||||
)
|
||||
summary.skippedDocs++
|
||||
return
|
||||
} else {
|
||||
logger.warn(
|
||||
{
|
||||
projectId,
|
||||
docId,
|
||||
docLinesBytes: docLines && docLines.length,
|
||||
version,
|
||||
rangesBytes: ranges && ranges.length,
|
||||
pathname,
|
||||
projectHistoryId,
|
||||
unflushedTime,
|
||||
lastUpdatedAt,
|
||||
lastUpdatedBy,
|
||||
},
|
||||
'refusing to delete doc, project still exists'
|
||||
)
|
||||
summary.skippedDocs++
|
||||
return
|
||||
}
|
||||
} else {
|
||||
status = 'projectDeleted'
|
||||
}
|
||||
summary.deletedDocs++
|
||||
if (options.dryRun) {
|
||||
logger.info(
|
||||
{
|
||||
projectId,
|
||||
docId,
|
||||
docLinesBytes: docLines && docLines.length,
|
||||
version,
|
||||
rangesBytes: ranges && ranges.length,
|
||||
pathname,
|
||||
projectHistoryId,
|
||||
unflushedTime,
|
||||
lastUpdatedAt,
|
||||
lastUpdatedBy,
|
||||
status,
|
||||
summary,
|
||||
},
|
||||
'dry run mode - would remove doc from redis'
|
||||
)
|
||||
return
|
||||
}
|
||||
removeDocFromMemory(projectId, docId)
|
||||
logger.info(
|
||||
{
|
||||
projectId,
|
||||
docId,
|
||||
docLinesBytes: docLines && docLines.length,
|
||||
version,
|
||||
rangesBytes: ranges && ranges.length,
|
||||
pathname,
|
||||
projectHistoryId,
|
||||
unflushedTime,
|
||||
lastUpdatedAt,
|
||||
lastUpdatedBy,
|
||||
status,
|
||||
summary,
|
||||
},
|
||||
'removed doc from redis'
|
||||
)
|
||||
}
|
||||
|
||||
async function findAndProcessDocs(options) {
|
||||
logger.info({ options }, 'removing deleted docs')
|
||||
let cursor = 0
|
||||
do {
|
||||
const [newCursor, doclinesKeys] = await rclient.scan(
|
||||
cursor,
|
||||
'MATCH',
|
||||
keys.docLines({ doc_id: '*' }),
|
||||
'COUNT',
|
||||
options.limit
|
||||
)
|
||||
await removeDeletedDocs(doclinesKeys, options)
|
||||
cursor = newCursor
|
||||
} while (cursor !== '0')
|
||||
}
|
||||
|
||||
clientPromise.then(client => {
|
||||
findAndProcessDocs({ limit: 1000, dryRun: process.env.DRY_RUN !== 'false' })
|
||||
.then(result => {
|
||||
rclient.quit()
|
||||
client.close()
|
||||
console.log('DONE')
|
||||
})
|
||||
.catch(function (error) {
|
||||
console.error(error)
|
||||
process.exit(1)
|
||||
})
|
||||
})
|
Loading…
Add table
Reference in a new issue