mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-21 20:47:08 -05:00
Merge pull request #5957 from overleaf/em-fix-oversized-docs
Add script for fixing oversized docs GitOrigin-RevId: 513ce326884e00530182d3272a9be1134e1fb1f5
This commit is contained in:
parent
7a0b40a4bf
commit
b9c8627d6a
1 changed files with 160 additions and 0 deletions
160
services/web/scripts/fix_oversized_docs.js
Normal file
160
services/web/scripts/fix_oversized_docs.js
Normal file
|
@ -0,0 +1,160 @@
|
|||
const fs = require('fs')
|
||||
const minimist = require('minimist')
|
||||
const { waitForDb, ObjectId } = require('../app/src/infrastructure/mongodb')
|
||||
const DocstoreManager = require('../app/src/Features/Docstore/DocstoreManager')
|
||||
const FileStoreHandler = require('../app/src/Features/FileStore/FileStoreHandler')
|
||||
const FileWriter = require('../app/src/infrastructure/FileWriter')
|
||||
const ProjectEntityMongoUpdateHandler = require('../app/src/Features/Project/ProjectEntityMongoUpdateHandler')
|
||||
const RedisWrapper = require('@overleaf/redis-wrapper')
|
||||
const Settings = require('@overleaf/settings')
|
||||
|
||||
const opts = parseArgs()
|
||||
const redis = RedisWrapper.createClient(Settings.redis.web)
|
||||
|
||||
function parseArgs() {
|
||||
const args = minimist(process.argv.slice(2), {
|
||||
boolean: ['commit', 'ignore-ranges'],
|
||||
})
|
||||
|
||||
const projectIds = args._
|
||||
if (projectIds.length === 0) {
|
||||
console.log(`Usage: ${process.argv[1]} [OPTS] PROJECT_ID
|
||||
|
||||
Options:
|
||||
--commit Actually convert oversized docs to binary files
|
||||
--max-doc-size Size over which docs are converted to binary files
|
||||
--ignore-ranges Convert docs even if they contain ranges
|
||||
`)
|
||||
process.exit(0)
|
||||
}
|
||||
|
||||
const commit = args.commit
|
||||
const ignoreRanges = args['ignore-ranges']
|
||||
const maxDocSize = args['max-doc-size']
|
||||
? parseInt(args['max-doc-size'], 10)
|
||||
: 2 * 1024 * 1024
|
||||
|
||||
return { projectIds, commit, ignoreRanges, maxDocSize }
|
||||
}
|
||||
|
||||
async function main() {
|
||||
await waitForDb()
|
||||
for (const projectId of opts.projectIds) {
|
||||
await processProject(projectId)
|
||||
}
|
||||
if (!opts.commit) {
|
||||
console.log('This was a dry run. Re-run with --commit to apply changes')
|
||||
}
|
||||
}
|
||||
|
||||
async function processProject(projectId) {
|
||||
const docIds = await getDocIds(projectId)
|
||||
for (const docId of docIds) {
|
||||
await processDoc(projectId, docId)
|
||||
}
|
||||
}
|
||||
|
||||
async function processDoc(projectId, docId) {
|
||||
const doc = await getDocFromRedis(docId)
|
||||
const size = doc.lines.reduce((sum, line) => sum + line.length + 1, 0)
|
||||
if (size > opts.maxDocSize) {
|
||||
if (
|
||||
!opts.ignoreRanges &&
|
||||
((doc.ranges.comments && doc.ranges.comments.length > 0) ||
|
||||
(doc.ranges.changes && doc.ranges.changes.length > 0))
|
||||
) {
|
||||
console.log(
|
||||
`Skipping doc ${doc.path} in project ${projectId} because it has ranges`
|
||||
)
|
||||
return
|
||||
}
|
||||
console.log(
|
||||
`Converting doc ${doc.path} in project ${projectId} to binary (${size} bytes)`
|
||||
)
|
||||
if (opts.commit) {
|
||||
const fileRef = await sendDocToFilestore(projectId, doc)
|
||||
await ProjectEntityMongoUpdateHandler.promises.replaceDocWithFile(
|
||||
ObjectId(projectId),
|
||||
ObjectId(docId),
|
||||
fileRef
|
||||
)
|
||||
await deleteDocFromMongo(projectId, doc)
|
||||
await deleteDocFromRedis(projectId, docId)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function getDocIds(projectId) {
|
||||
const docIds = await redis.smembers(`DocsIn:{${projectId}}`)
|
||||
return docIds
|
||||
}
|
||||
|
||||
async function getDocFromRedis(docId) {
|
||||
const lines = await redis.get(`doclines:{${docId}}`)
|
||||
const path = await redis.get(`Pathname:{${docId}}`)
|
||||
const ranges = await redis.get(`Ranges:{${docId}}`)
|
||||
return {
|
||||
id: docId,
|
||||
lines: JSON.parse(lines),
|
||||
ranges: ranges ? JSON.parse(ranges) : {},
|
||||
path,
|
||||
}
|
||||
}
|
||||
|
||||
async function sendDocToFilestore(projectId, doc) {
|
||||
const basename = doc.path.split('/').pop()
|
||||
const tmpFilePath = await FileWriter.promises.writeLinesToDisk(
|
||||
projectId,
|
||||
doc.lines
|
||||
)
|
||||
try {
|
||||
const { fileRef } = await FileStoreHandler.promises.uploadFileFromDisk(
|
||||
projectId,
|
||||
{ name: basename, rev: doc.version + 1 },
|
||||
tmpFilePath
|
||||
)
|
||||
return fileRef
|
||||
} finally {
|
||||
fs.promises.unlink(tmpFilePath)
|
||||
}
|
||||
}
|
||||
|
||||
async function deleteDocFromMongo(projectId, doc) {
|
||||
const basename = doc.path.split('/').pop()
|
||||
const deletedAt = new Date()
|
||||
await DocstoreManager.promises.deleteDoc(
|
||||
projectId,
|
||||
doc.id,
|
||||
basename,
|
||||
deletedAt
|
||||
)
|
||||
}
|
||||
|
||||
async function deleteDocFromRedis(projectId, docId) {
|
||||
await redis.del(
|
||||
`Blocking:{${docId}}`,
|
||||
`doclines:{${docId}}`,
|
||||
`DocOps:{${docId}}`,
|
||||
`DocVersion:{${docId}}`,
|
||||
`DocHash:{${docId}}`,
|
||||
`ProjectId:{${docId}}`,
|
||||
`Ranges:{${docId}}`,
|
||||
`UnflushedTime:{${docId}}`,
|
||||
`Pathname:{${docId}}`,
|
||||
`ProjectHistoryId:{${docId}}`,
|
||||
`ProjectHistoryType:{${docId}}`,
|
||||
`PendingUpdates:{${docId}}`,
|
||||
`lastUpdatedAt:{${docId}}`,
|
||||
`lastUpdatedBy:{${docId}}`
|
||||
)
|
||||
await redis.srem(`DocsIn:{${projectId}}`, projectId)
|
||||
}
|
||||
|
||||
main()
|
||||
.then(() => {
|
||||
process.exit(0)
|
||||
})
|
||||
.catch(err => {
|
||||
console.error(err)
|
||||
process.exit(1)
|
||||
})
|
Loading…
Reference in a new issue