Merge pull request #8492 from overleaf/msm-history-migration-convert-large-docs

[web] Updated history upgrade scripts to move large docs to filestore

GitOrigin-RevId: 11ac9fc879ff03756e3ec41bb156e878c4223014
This commit is contained in:
Miguel Serrano 2022-06-20 16:20:43 +02:00 committed by Copybot
parent 3a1bba07f7
commit e7c8329491
2 changed files with 53 additions and 1 deletions

View file

@ -1,9 +1,12 @@
const { ReadPreference, ObjectId } = require('mongodb') const { ReadPreference, ObjectId } = require('mongodb')
const { db } = require('../../app/src/infrastructure/mongodb') const { db } = require('../../app/src/infrastructure/mongodb')
const Settings = require('@overleaf/settings')
const ProjectHistoryHandler = require('../../app/src/Features/Project/ProjectHistoryHandler') const ProjectHistoryHandler = require('../../app/src/Features/Project/ProjectHistoryHandler')
const HistoryManager = require('../../app/src/Features/History/HistoryManager') const HistoryManager = require('../../app/src/Features/History/HistoryManager')
const ProjectHistoryController = require('../../modules/admin-panel/app/src/ProjectHistoryController') const ProjectHistoryController = require('../../modules/admin-panel/app/src/ProjectHistoryController')
const ProjectEntityHandler = require('../../app/src/Features/Project/ProjectEntityHandler')
const ProjectEntityUpdateHandler = require('../../app/src/Features/Project/ProjectEntityUpdateHandler')
// Timestamp of when 'Enable history for SL in background' release // Timestamp of when 'Enable history for SL in background' release
const ID_WHEN_FULL_PROJECT_HISTORY_ENABLED = '5a8d8a370000000000000000' const ID_WHEN_FULL_PROJECT_HISTORY_ENABLED = '5a8d8a370000000000000000'
@ -292,8 +295,45 @@ async function anyDocHistoryIndexExists(project) {
) )
} }
async function convertLargeDocsToFile(projectId, userId) {
const docs = await ProjectEntityHandler.promises.getAllDocs(projectId)
let convertedDocCount = 0
for (const doc of Object.values(docs)) {
const sizeBound = JSON.stringify(doc.lines)
if (docIsTooLarge(sizeBound, doc.lines, Settings.max_doc_length)) {
await ProjectEntityUpdateHandler.promises.convertDocToFile(
projectId,
doc._id,
userId
)
convertedDocCount++
}
}
return convertedDocCount
}
// check whether the total size of the document in characters exceeds the
// maxDocLength.
//
// Copied from document-updater:
// https://github.com/overleaf/internal/blob/74adfbebda5f3c2c37d9937f0db5c4106ecde492/services/document-updater/app/js/Limits.js#L18
function docIsTooLarge(estimatedSize, lines, maxDocLength) {
if (estimatedSize <= maxDocLength) {
return false // definitely under the limit, no need to calculate the total size
}
// calculate the total size, bailing out early if the size limit is reached
let size = 0
for (const line of lines) {
size += line.length + 1 // include the newline
if (size > maxDocLength) return true
}
// since we didn't hit the limit in the loop, the document is within the allowed length
return false
}
module.exports = { module.exports = {
determineProjectHistoryType, determineProjectHistoryType,
getUpgradeFunctionForType, getUpgradeFunctionForType,
upgradeProject, upgradeProject,
convertLargeDocsToFile,
} }

View file

@ -1,4 +1,4 @@
const SCRIPT_VERSION = 3 const SCRIPT_VERSION = 4
const VERBOSE_LOGGING = process.env.VERBOSE_LOGGING === 'true' const VERBOSE_LOGGING = process.env.VERBOSE_LOGGING === 'true'
const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY, 10) || 10 const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY, 10) || 10
const BATCH_SIZE = parseInt(process.env.BATCH_SIZE, 10) || 100 const BATCH_SIZE = parseInt(process.env.BATCH_SIZE, 10) || 100
@ -16,11 +16,16 @@ process.env.MONGO_SOCKET_TIMEOUT =
const PROJECT_ID = process.env.PROJECT_ID const PROJECT_ID = process.env.PROJECT_ID
// User id is required to move large documents to filestore
const USER_ID = process.env.PROJECT_ID
const CONVERT_LARGE_DOCS_TO_FILE = process.env.DRY_RUN === 'true'
const { ObjectId, ReadPreference } = require('mongodb') const { ObjectId, ReadPreference } = require('mongodb')
const { db, waitForDb } = require('../../app/src/infrastructure/mongodb') const { db, waitForDb } = require('../../app/src/infrastructure/mongodb')
const { promiseMapWithLimit } = require('../../app/src/util/promises') const { promiseMapWithLimit } = require('../../app/src/util/promises')
const { batchedUpdate } = require('../helpers/batchedUpdate') const { batchedUpdate } = require('../helpers/batchedUpdate')
const ProjectHistoryController = require('../../modules/admin-panel/app/src/ProjectHistoryController') const ProjectHistoryController = require('../../modules/admin-panel/app/src/ProjectHistoryController')
const HistoryUpgradeHelper = require('./HistoryUpgradeHelper')
console.log({ console.log({
DRY_RUN, DRY_RUN,
@ -110,6 +115,13 @@ async function doUpgradeForNoneWithConversion(project) {
const projectIdString = project._id.toString() const projectIdString = project._id.toString()
if (!DRY_RUN) { if (!DRY_RUN) {
try { try {
if (CONVERT_LARGE_DOCS_TO_FILE) {
const convertedDocCount =
await HistoryUpgradeHelper.convertLargeDocsToFile(projectId, USER_ID)
console.log(
`converted ${convertedDocCount} large docs to binary files for project ${projectId}`
)
}
await ProjectHistoryController.migrateProjectHistory(projectIdString) await ProjectHistoryController.migrateProjectHistory(projectIdString)
} catch (err) { } catch (err) {
// if migrateProjectHistory fails, it cleans up by deleting // if migrateProjectHistory fails, it cleans up by deleting