mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-14 20:40:17 -05:00
e7f040ecf7
[web] allow large broken docs to be deleted GitOrigin-RevId: 0322f55fcc5448ec1d6e6ee96056b80980bd3406
182 lines
5 KiB
JavaScript
182 lines
5 KiB
JavaScript
// recover docs from redis where there is no doc in mongo but the project exists
|
|
|
|
const minimist = require('minimist')
|
|
const { db, waitForDb, ObjectId } = require('../app/src/infrastructure/mongodb')
|
|
const ProjectEntityUpdateHandler = require('../app/src/Features/Project/ProjectEntityUpdateHandler')
|
|
const ProjectEntityRestoreHandler = require('../app/src/Features/Project/ProjectEntityRestoreHandler')
|
|
const RedisWrapper = require('@overleaf/redis-wrapper')
|
|
const Settings = require('@overleaf/settings')
|
|
const logger = require('@overleaf/logger')
|
|
const opts = parseArgs()
|
|
const redis = RedisWrapper.createClient(Settings.redis.web)
|
|
|
|
function parseArgs() {
|
|
const args = minimist(process.argv.slice(2), {
|
|
boolean: ['commit'],
|
|
})
|
|
const commit = args.commit
|
|
const deleteOversized = args['delete-oversized']
|
|
return {
|
|
commit,
|
|
deleteOversized,
|
|
maxDocSize: 2 * 1024 * 1024,
|
|
}
|
|
}
|
|
|
|
function extractObjectId(s) {
|
|
const m = s.match(/:\{?([0-9a-f]{24})\}?/)
|
|
return m[1]
|
|
}
|
|
|
|
async function main() {
|
|
await waitForDb()
|
|
logger.info({ opts }, 'removing deleted docs')
|
|
let cursor = 0
|
|
do {
|
|
const [newCursor, doclinesKeys] = await redis.scan(
|
|
cursor,
|
|
'MATCH',
|
|
'doclines:{*}',
|
|
'COUNT',
|
|
1000
|
|
)
|
|
const docIds = doclinesKeys.map(extractObjectId)
|
|
for (const docId of docIds) {
|
|
await processDoc(docId)
|
|
}
|
|
cursor = newCursor
|
|
} while (cursor !== '0')
|
|
if (!opts.commit) {
|
|
console.log('This was a dry run. Re-run with --commit to apply changes')
|
|
}
|
|
}
|
|
|
|
async function processDoc(docId) {
|
|
// check if the doc is in mongo.. if so ignore it
|
|
const docCount = await db.docs.find({ _id: ObjectId(docId) }).count()
|
|
if (docCount > 0) {
|
|
logger.debug({ docId }, 'doc is present in mongo - no recovery needed')
|
|
return
|
|
}
|
|
// get the doc from redis and check if it has a project id
|
|
const doc = await getDoc(docId)
|
|
const projectId = doc.projectId
|
|
if (!projectId) {
|
|
logger.warn(
|
|
{ docId, doc },
|
|
'projectId not available in redis, cannot restore - skipping'
|
|
)
|
|
// we could delete the document in redis here since we have no way to recover it
|
|
return
|
|
}
|
|
// check that the project is in mongo, if not delete the doc
|
|
const project = await db.projects.findOne({ _id: ObjectId(projectId) })
|
|
if (!project) {
|
|
logger.warn(
|
|
{ docId },
|
|
'project not present in mongo - could remove doc from redis'
|
|
)
|
|
return
|
|
}
|
|
// if the doc is too big we will need to convert it to a file, skip it for now
|
|
// or delete the doc if the --delete-oversized option is used
|
|
const size = doc.lines.reduce((sum, line) => sum + line.length + 1, 0)
|
|
if (size > opts.maxDocSize) {
|
|
logger.warn(
|
|
{ docId, projectId, size },
|
|
'doc that exceeds max size, cannot restore'
|
|
)
|
|
if (opts.deleteOversized) {
|
|
logger.info(
|
|
{ docId, projectId, size, commit: opts.commit },
|
|
'forcing delete of large doc'
|
|
)
|
|
if (opts.commit) {
|
|
try {
|
|
await deleteDocFromRedis(projectId, docId)
|
|
} catch (err) {
|
|
logger.error(
|
|
{ docId, projectId, deleteErr: err },
|
|
'error deleting doc from redis'
|
|
)
|
|
}
|
|
}
|
|
}
|
|
return
|
|
}
|
|
// now we have a doc content from redis, in a project where the doc has been deleted
|
|
const restoredName = ProjectEntityRestoreHandler.generateRestoredName(
|
|
doc.name || 'unknown',
|
|
'recovered'
|
|
)
|
|
logger.info(
|
|
{ docId, projectId, restoredName, commit: opts.commit },
|
|
'recovering doc from redis to mongo'
|
|
)
|
|
|
|
if (opts.commit) {
|
|
const folderId = project.rootFolder[0]._id
|
|
try {
|
|
await ProjectEntityUpdateHandler.promises.addDocWithRanges(
|
|
projectId,
|
|
folderId,
|
|
restoredName,
|
|
doc.lines,
|
|
doc.ranges,
|
|
null
|
|
)
|
|
await deleteDocFromRedis(projectId, docId)
|
|
} catch (err) {
|
|
logger.error(
|
|
{ docId, projectId, restoreErr: err },
|
|
'error restoring doc from redis to mongo'
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
async function getDoc(docId) {
|
|
const [projectId, lines, ranges, pathname] = await redis.mget(
|
|
`ProjectId:{${docId}}`,
|
|
`doclines:{${docId}}`,
|
|
`Ranges:{${docId}}`,
|
|
`Pathname:{${docId}}`
|
|
)
|
|
const name = pathname?.split('/').pop()
|
|
return {
|
|
projectId,
|
|
id: docId,
|
|
lines: JSON.parse(lines),
|
|
ranges: ranges ? JSON.parse(ranges) : {},
|
|
name: name || 'unnamed',
|
|
}
|
|
}
|
|
|
|
async function deleteDocFromRedis(projectId, docId) {
|
|
await redis.del(
|
|
`Blocking:{${docId}}`,
|
|
`doclines:{${docId}}`,
|
|
`DocOps:{${docId}}`,
|
|
`DocVersion:{${docId}}`,
|
|
`DocHash:{${docId}}`,
|
|
`ProjectId:{${docId}}`,
|
|
`Ranges:{${docId}}`,
|
|
`UnflushedTime:{${docId}}`,
|
|
`Pathname:{${docId}}`,
|
|
`ProjectHistoryId:{${docId}}`,
|
|
`ProjectHistoryType:{${docId}}`,
|
|
`PendingUpdates:{${docId}}`,
|
|
`lastUpdatedAt:{${docId}}`,
|
|
`lastUpdatedBy:{${docId}}`
|
|
)
|
|
await redis.srem(`DocsIn:{${projectId}}`, projectId)
|
|
}
|
|
|
|
main()
|
|
.then(() => {
|
|
process.exit(0)
|
|
})
|
|
.catch(err => {
|
|
console.error(err)
|
|
process.exit(1)
|
|
})
|