Merge pull request #14198 from overleaf/jpa-history-migration

[server-pro] prepare hotfix 3.5.11

GitOrigin-RevId: c4950a327e472c9cfe1af93599dd5a9a54bb956b
This commit is contained in:
Jakob Ackermann 2023-08-10 17:19:23 +02:00 committed by Copybot
parent a3e30c0a39
commit a2322f090a
6 changed files with 317 additions and 0 deletions

View file

@ -0,0 +1,8 @@
FROM sharelatex/sharelatex:3.5.10
# Patch: Drop the old history collections and increase mongo query timeout
ADD clean_sl_history_data.js /overleaf/services/web/scripts/history/clean_sl_history_data.js
# Patch: convert large deleted docs to files
COPY pr_14200.patch .
RUN patch -p0 < pr_14200.patch

View file

@ -0,0 +1,70 @@
// Increase default mongo query timeout from 1min to 1h
process.env.MONGO_SOCKET_TIMEOUT = process.env.MONGO_SOCKET_TIMEOUT || '360000'
const { waitForDb, db } = require('../../app/src/infrastructure/mongodb')
async function main() {
await checkAllProjectsAreMigrated()
await setAllowDowngradeToFalse()
await deleteHistoryCollections()
console.log('Legacy history data cleaned up successfully')
process.exit(0)
}
async function checkAllProjectsAreMigrated() {
console.log('checking all projects are migrated to Full Project History')
const count = await db.projects.countDocuments({
'overleaf.history.display': { $ne: true },
})
if (count === 0) {
console.log('All projects are migrated to Full Project History')
} else {
console.error(
`There are ${count} projects that are not migrated to Full Project History` +
` please complete the migration before running this script again.`
)
process.exit(1)
}
}
async function setAllowDowngradeToFalse() {
console.log('unsetting `allowDowngrade` flag in all projects')
await db.projects.updateMany(
{
'overleaf.history.id': { $exists: true },
'overleaf.history.allowDowngrade': true,
},
{ $unset: { 'overleaf.history.allowDowngrade': 1 } }
)
console.log('unsetting `allowDowngrade` flag in all projects - Done')
}
async function deleteHistoryCollections() {
await gracefullyDropCollection(db.docHistory)
await gracefullyDropCollection(db.docHistoryIndex)
await gracefullyDropCollection(db.projectHistoryMetaData)
}
async function gracefullyDropCollection(collection) {
const collectionName = collection.collectionName
console.log(`removing \`${collectionName}\` data`)
try {
await collection.drop()
} catch (err) {
if (err.code === 26) {
// collection already deleted
console.log(`removing \`${collectionName}\` data - Already removed`)
} else {
throw err
}
}
console.log(`removing \`${collectionName}\` data - Done`)
}
waitForDb()
.then(main)
.catch(err => {
console.error(err)
process.exit(1)
})

View file

@ -0,0 +1,95 @@
--- services/web/modules/history-migration/app/src/HistoryUpgradeHelper.js
+++ services/web/modules/history-migration/app/src/HistoryUpgradeHelper.js
@@ -1,6 +1,9 @@
+const _ = require('lodash')
+const fs = require('fs')
const { ReadPreference, ObjectId } = require('mongodb')
const { db } = require('../../../../app/src/infrastructure/mongodb')
const Settings = require('@overleaf/settings')
+const logger = require('@overleaf/logger')
const ProjectHistoryHandler = require('../../../../app/src/Features/Project/ProjectHistoryHandler')
const HistoryManager = require('../../../../app/src/Features/History/HistoryManager')
@@ -8,6 +11,8 @@ const ProjectHistoryController = require('./ProjectHistoryController')
const ProjectEntityHandler = require('../../../../app/src/Features/Project/ProjectEntityHandler')
const ProjectEntityUpdateHandler = require('../../../../app/src/Features/Project/ProjectEntityUpdateHandler')
const DocumentUpdaterHandler = require('../../../../app/src/Features/DocumentUpdater/DocumentUpdaterHandler')
+const { Doc } = require('../../../../app/src/models/Doc')
+const FileWriter = require('../../../../app/src/infrastructure/FileWriter')
// Timestamp of when 'Enable history for SL in background' release
const ID_WHEN_FULL_PROJECT_HISTORY_ENABLED =
@@ -340,9 +345,33 @@ async function anyDocHistoryIndexExists(project) {
)
}
+async function convertDeletedDocToFile(projectId, docId, userId, source, doc) {
+ // write the doc to a temporary file and upload to filestore
+ const tmpFilePath = await FileWriter.promises.writeLinesToDisk(
+ projectId,
+ doc.lines
+ )
+ await ProjectEntityUpdateHandler.promises.upsertFileWithPath(
+ projectId,
+ `/_deleted/${docId}/${doc.name}`,
+ tmpFilePath,
+ null,
+ userId,
+ source
+ )
+ // hard delete the original doc, otherwise it will get picked up again
+ // by readDeletedDocs in ProjectHistoryController and the final
+ // resync of the history will fail.
+ await db.docs.deleteOne({ _id: docId })
+ await db.docOps.deleteOne({ doc_id: docId })
+ // clean up the temporary file
+ await fs.promises.unlink(tmpFilePath)
+}
+
async function convertLargeDocsToFile(projectId, userId) {
- const docs = await ProjectEntityHandler.promises.getAllDocs(projectId)
let convertedDocCount = 0
+ const docs = await ProjectEntityHandler.promises.getAllDocs(projectId)
+ // Convert large docs to files
for (const doc of Object.values(docs)) {
const sizeBound = JSON.stringify(doc.lines)
if (docIsTooLarge(sizeBound, doc.lines, Settings.max_doc_length)) {
@@ -355,6 +384,39 @@ async function convertLargeDocsToFile(projectId, userId) {
convertedDocCount++
}
}
+ // Convert deleted docs to files, these cannot be converted by
+ // ProjectEntityUpdateHandler so we do it manually
+ const docsCursor = Doc.find({
+ project_id: ObjectId(projectId),
+ })
+ .lean()
+ .cursor()
+ for await (const doc of docsCursor) {
+ // check whether the doc is present in the filetree instead of
+ // relying on the deletedAt property
+ const docExistsInFiletree = _.find(docs, existingDoc =>
+ existingDoc._id.equals(doc._id)
+ )
+ if (docExistsInFiletree || doc.inS3) {
+ continue
+ }
+ const sizeBound = JSON.stringify(doc.lines)
+ if (docIsTooLarge(sizeBound, doc.lines, Settings.max_doc_length)) {
+ const docId = doc._id.toString()
+ if (!_.isEmpty(doc.ranges)) {
+ throw new Error(`found too large deleted doc with ranges: ${docId}`)
+ }
+ logger.warn({ projectId, docId }, 'converting large deleted doc')
+ await convertDeletedDocToFile(
+ projectId,
+ doc._id,
+ userId,
+ 'history-migration',
+ doc
+ )
+ convertedDocCount++
+ }
+ }
return convertedDocCount
}

View file

@ -0,0 +1,4 @@
FROM sharelatex/sharelatex:4.0.5
# Patch: Drop the old history collections and increase mongo query timeout
ADD clean_sl_history_data.js /overleaf/services/web/scripts/history/clean_sl_history_data.js

View file

@ -0,0 +1,70 @@
// Increase default mongo query timeout from 1min to 1h
process.env.MONGO_SOCKET_TIMEOUT = process.env.MONGO_SOCKET_TIMEOUT || '360000'
const { waitForDb, db } = require('../../app/src/infrastructure/mongodb')
async function main() {
await checkAllProjectsAreMigrated()
await setAllowDowngradeToFalse()
await deleteHistoryCollections()
console.log('Legacy history data cleaned up successfully')
process.exit(0)
}
async function checkAllProjectsAreMigrated() {
console.log('checking all projects are migrated to Full Project History')
const count = await db.projects.countDocuments({
'overleaf.history.display': { $ne: true },
})
if (count === 0) {
console.log('All projects are migrated to Full Project History')
} else {
console.error(
`There are ${count} projects that are not migrated to Full Project History` +
` please complete the migration before running this script again.`
)
process.exit(1)
}
}
async function setAllowDowngradeToFalse() {
console.log('unsetting `allowDowngrade` flag in all projects')
await db.projects.updateMany(
{
'overleaf.history.id': { $exists: true },
'overleaf.history.allowDowngrade': true,
},
{ $unset: { 'overleaf.history.allowDowngrade': 1 } }
)
console.log('unsetting `allowDowngrade` flag in all projects - Done')
}
async function deleteHistoryCollections() {
await gracefullyDropCollection(db.docHistory)
await gracefullyDropCollection(db.docHistoryIndex)
await gracefullyDropCollection(db.projectHistoryMetaData)
}
async function gracefullyDropCollection(collection) {
const collectionName = collection.collectionName
console.log(`removing \`${collectionName}\` data`)
try {
await collection.drop()
} catch (err) {
if (err.code === 26) {
// collection already deleted
console.log(`removing \`${collectionName}\` data - Already removed`)
} else {
throw err
}
}
console.log(`removing \`${collectionName}\` data - Done`)
}
waitForDb()
.then(main)
.catch(err => {
console.error(err)
process.exit(1)
})

View file

@ -0,0 +1,70 @@
// Increase default mongo query timeout from 1min to 1h
process.env.MONGO_SOCKET_TIMEOUT = process.env.MONGO_SOCKET_TIMEOUT || '360000'
const { waitForDb, db } = require('../../app/src/infrastructure/mongodb')
async function main() {
await checkAllProjectsAreMigrated()
await setAllowDowngradeToFalse()
await deleteHistoryCollections()
console.log('Legacy history data cleaned up successfully')
process.exit(0)
}
async function checkAllProjectsAreMigrated() {
console.log('checking all projects are migrated to Full Project History')
const count = await db.projects.countDocuments({
'overleaf.history.display': { $ne: true },
})
if (count === 0) {
console.log('All projects are migrated to Full Project History')
} else {
console.error(
`There are ${count} projects that are not migrated to Full Project History` +
` please complete the migration before running this script again.`
)
process.exit(1)
}
}
async function setAllowDowngradeToFalse() {
console.log('unsetting `allowDowngrade` flag in all projects')
await db.projects.updateMany(
{
'overleaf.history.id': { $exists: true },
'overleaf.history.allowDowngrade': true,
},
{ $unset: { 'overleaf.history.allowDowngrade': 1 } }
)
console.log('unsetting `allowDowngrade` flag in all projects - Done')
}
async function deleteHistoryCollections() {
await gracefullyDropCollection(db.docHistory)
await gracefullyDropCollection(db.docHistoryIndex)
await gracefullyDropCollection(db.projectHistoryMetaData)
}
async function gracefullyDropCollection(collection) {
const collectionName = collection.collectionName
console.log(`removing \`${collectionName}\` data`)
try {
await collection.drop()
} catch (err) {
if (err.code === 26) {
// collection already deleted
console.log(`removing \`${collectionName}\` data - Already removed`)
} else {
throw err
}
}
console.log(`removing \`${collectionName}\` data - Done`)
}
waitForDb()
.then(main)
.catch(err => {
console.error(err)
process.exit(1)
})