mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-21 20:47:08 -05:00
Merge pull request #7579 from overleaf/jpa-refactor-orphaned-data-helper
[web] de-duplicate logic for get a list of hard deleted project ids GitOrigin-RevId: daf2ff427e24f9ef9253e4bc9ff52f53196fd854
This commit is contained in:
parent
f1f9771150
commit
0120268c57
3 changed files with 125 additions and 191 deletions
|
@ -13,11 +13,11 @@ process.env.BATCH_SIZE = BATCH_SIZE
|
|||
process.env.MONGO_SOCKET_TIMEOUT =
|
||||
parseInt(process.env.MONGO_SOCKET_TIMEOUT, 10) || 600000
|
||||
|
||||
const { ObjectId, ReadPreference } = require('mongodb')
|
||||
const { db } = require('../app/src/infrastructure/mongodb')
|
||||
const { ObjectId } = require('mongodb')
|
||||
const { promiseMapWithLimit } = require('../app/src/util/promises')
|
||||
const { batchedUpdate } = require('./helpers/batchedUpdate')
|
||||
const ChatApiHandler = require('../app/src/Features/Chat/ChatApiHandler')
|
||||
const { getHardDeletedProjectIds } = require('./delete_orphaned_data_helper')
|
||||
|
||||
console.log({
|
||||
DRY_RUN,
|
||||
|
@ -36,11 +36,6 @@ async function processBatch(_, rooms) {
|
|||
if (rooms.length && rooms[0]._id) {
|
||||
RESULT.continueFrom = rooms[0]._id
|
||||
}
|
||||
|
||||
// Logic taken from delete_orphaned_docs_online_check.js
|
||||
// gets projectIds from rooms,
|
||||
// then checks 'expired' status of project
|
||||
|
||||
const projectIds = Array.from(
|
||||
new Set(rooms.map(room => room.project_id.toString()))
|
||||
).map(ObjectId)
|
||||
|
@ -49,35 +44,11 @@ async function processBatch(_, rooms) {
|
|||
JSON.stringify(projectIds)
|
||||
)
|
||||
|
||||
const doubleCheckProjectIdsOnPrimary = []
|
||||
async function checkProjectOnSecondary(projectId) {
|
||||
if (await checkProjectExistsOnSecondary(projectId)) {
|
||||
// Finding a project with secondary confidence is sufficient.
|
||||
return
|
||||
}
|
||||
// At this point, the secondaries deem this project as having orphaned chat.
|
||||
doubleCheckProjectIdsOnPrimary.push(projectId)
|
||||
}
|
||||
|
||||
const projectsWithOrphanedChat = []
|
||||
async function checkProjectOnPrimary(projectId) {
|
||||
if (await checkProjectExistsOnPrimary(projectId)) {
|
||||
// The project is actually live.
|
||||
return
|
||||
}
|
||||
projectsWithOrphanedChat.push(projectId)
|
||||
}
|
||||
|
||||
await promiseMapWithLimit(
|
||||
READ_CONCURRENCY_SECONDARY,
|
||||
const projectsWithOrphanedChat = await getHardDeletedProjectIds({
|
||||
projectIds,
|
||||
checkProjectOnSecondary
|
||||
)
|
||||
await promiseMapWithLimit(
|
||||
READ_CONCURRENCY_PRIMARY,
|
||||
doubleCheckProjectIdsOnPrimary,
|
||||
checkProjectOnPrimary
|
||||
)
|
||||
READ_CONCURRENCY_SECONDARY,
|
||||
})
|
||||
|
||||
console.log(
|
||||
`Destroying chat for projects (${projectsWithOrphanedChat.length})`,
|
||||
|
@ -104,72 +75,6 @@ async function processBatch(_, rooms) {
|
|||
}
|
||||
}
|
||||
|
||||
async function getDeletedProject(projectId, readPreference) {
|
||||
return await db.deletedProjects.findOne(
|
||||
{ 'deleterData.deletedProjectId': projectId },
|
||||
{
|
||||
// There is no index on .project. Pull down something small.
|
||||
projection: { 'project._id': 1 },
|
||||
readPreference,
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
async function getProject(projectId, readPreference) {
|
||||
return await db.projects.findOne(
|
||||
{ _id: projectId },
|
||||
{
|
||||
// Pulling down an empty object is fine for differentiating with null.
|
||||
projection: { _id: 0 },
|
||||
readPreference,
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
async function checkProjectExistsWithReadPreference(projectId, readPreference) {
|
||||
// NOTE: Possible race conditions!
|
||||
// There are two processes which are racing with our queries:
|
||||
// 1. project deletion
|
||||
// 2. project restoring
|
||||
// For 1. we check the projects collection before deletedProjects.
|
||||
// If a project were to be delete in this very moment, we should see the
|
||||
// soft-deleted entry which is created before deleting the projects entry.
|
||||
// For 2. we check the projects collection after deletedProjects again.
|
||||
// If a project were to be restored in this very moment, it is very likely
|
||||
// to see the projects entry again.
|
||||
// Unlikely edge case: Restore+Deletion in rapid succession.
|
||||
// We could add locking to the ProjectDeleter for ruling ^ out.
|
||||
if (await getProject(projectId, readPreference)) {
|
||||
// The project is live.
|
||||
return true
|
||||
}
|
||||
const deletedProject = await getDeletedProject(projectId, readPreference)
|
||||
if (deletedProject && deletedProject.project) {
|
||||
// The project is registered for hard-deletion.
|
||||
return true
|
||||
}
|
||||
if (await getProject(projectId, readPreference)) {
|
||||
// The project was just restored.
|
||||
return true
|
||||
}
|
||||
// The project does not exist.
|
||||
return false
|
||||
}
|
||||
|
||||
async function checkProjectExistsOnPrimary(projectId) {
|
||||
return await checkProjectExistsWithReadPreference(
|
||||
projectId,
|
||||
ReadPreference.PRIMARY
|
||||
)
|
||||
}
|
||||
|
||||
async function checkProjectExistsOnSecondary(projectId) {
|
||||
return await checkProjectExistsWithReadPreference(
|
||||
projectId,
|
||||
ReadPreference.SECONDARY
|
||||
)
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const projection = {
|
||||
_id: 1,
|
||||
|
|
110
services/web/scripts/delete_orphaned_data_helper.js
Normal file
110
services/web/scripts/delete_orphaned_data_helper.js
Normal file
|
@ -0,0 +1,110 @@
|
|||
const { ReadPreference } = require('mongodb')
|
||||
const { db } = require('../app/src/infrastructure/mongodb')
|
||||
const { promiseMapWithLimit } = require('../app/src/util/promises')
|
||||
|
||||
async function getDeletedProject(projectId, readPreference) {
|
||||
return await db.deletedProjects.findOne(
|
||||
{ 'deleterData.deletedProjectId': projectId },
|
||||
{
|
||||
// There is no index on .project. Pull down something small.
|
||||
projection: { 'project._id': 1 },
|
||||
readPreference,
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
async function getProject(projectId, readPreference) {
|
||||
return await db.projects.findOne(
|
||||
{ _id: projectId },
|
||||
{
|
||||
// Pulling down an empty object is fine for differentiating with null.
|
||||
projection: { _id: 0 },
|
||||
readPreference,
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
async function checkProjectExistsWithReadPreference(projectId, readPreference) {
|
||||
// NOTE: Possible race conditions!
|
||||
// There are two processes which are racing with our queries:
|
||||
// 1. project deletion
|
||||
// 2. project restoring
|
||||
// For 1. we check the projects collection before deletedProjects.
|
||||
// If a project were to be delete in this very moment, we should see the
|
||||
// soft-deleted entry which is created before deleting the projects entry.
|
||||
// For 2. we check the projects collection after deletedProjects again.
|
||||
// If a project were to be restored in this very moment, it is very likely
|
||||
// to see the projects entry again.
|
||||
// Unlikely edge case: Restore+Deletion in rapid succession.
|
||||
// We could add locking to the ProjectDeleter for ruling ^ out.
|
||||
if (await getProject(projectId, readPreference)) {
|
||||
// The project is live.
|
||||
return true
|
||||
}
|
||||
const deletedProject = await getDeletedProject(projectId, readPreference)
|
||||
if (deletedProject && deletedProject.project) {
|
||||
// The project is registered for hard-deletion.
|
||||
return true
|
||||
}
|
||||
if (await getProject(projectId, readPreference)) {
|
||||
// The project was just restored.
|
||||
return true
|
||||
}
|
||||
// The project does not exist.
|
||||
return false
|
||||
}
|
||||
|
||||
async function checkProjectExistsOnPrimary(projectId) {
|
||||
return await checkProjectExistsWithReadPreference(
|
||||
projectId,
|
||||
ReadPreference.PRIMARY
|
||||
)
|
||||
}
|
||||
|
||||
async function checkProjectExistsOnSecondary(projectId) {
|
||||
return await checkProjectExistsWithReadPreference(
|
||||
projectId,
|
||||
ReadPreference.SECONDARY
|
||||
)
|
||||
}
|
||||
|
||||
async function getHardDeletedProjectIds({
|
||||
projectIds,
|
||||
READ_CONCURRENCY_PRIMARY,
|
||||
READ_CONCURRENCY_SECONDARY,
|
||||
}) {
|
||||
const doubleCheckProjectIdsOnPrimary = []
|
||||
async function checkProjectOnSecondary(projectId) {
|
||||
if (await checkProjectExistsOnSecondary(projectId)) {
|
||||
// Finding a project with secondary confidence is sufficient.
|
||||
return
|
||||
}
|
||||
// At this point, the secondaries deem this project as having orphaned docs.
|
||||
doubleCheckProjectIdsOnPrimary.push(projectId)
|
||||
}
|
||||
|
||||
const hardDeletedProjectIds = []
|
||||
async function checkProjectOnPrimary(projectId) {
|
||||
if (await checkProjectExistsOnPrimary(projectId)) {
|
||||
// The project is actually live.
|
||||
return
|
||||
}
|
||||
hardDeletedProjectIds.push(projectId)
|
||||
}
|
||||
|
||||
await promiseMapWithLimit(
|
||||
READ_CONCURRENCY_SECONDARY,
|
||||
projectIds,
|
||||
checkProjectOnSecondary
|
||||
)
|
||||
await promiseMapWithLimit(
|
||||
READ_CONCURRENCY_PRIMARY,
|
||||
doubleCheckProjectIdsOnPrimary,
|
||||
checkProjectOnPrimary
|
||||
)
|
||||
return hardDeletedProjectIds
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getHardDeletedProjectIds,
|
||||
}
|
|
@ -3,6 +3,7 @@ const { promisify } = require('util')
|
|||
const { ObjectId, ReadPreference } = require('mongodb')
|
||||
const { db, waitForDb } = require('../app/src/infrastructure/mongodb')
|
||||
const { promiseMapWithLimit } = require('../app/src/util/promises')
|
||||
const { getHardDeletedProjectIds } = require('./delete_orphaned_data_helper')
|
||||
const sleep = promisify(setTimeout)
|
||||
|
||||
const NOW_IN_S = Date.now() / 1000
|
||||
|
@ -90,28 +91,6 @@ async function main() {
|
|||
}
|
||||
}
|
||||
|
||||
async function getDeletedProject(projectId, readPreference) {
|
||||
return await db.deletedProjects.findOne(
|
||||
{ 'deleterData.deletedProjectId': projectId },
|
||||
{
|
||||
// There is no index on .project. Pull down something small.
|
||||
projection: { 'project._id': 1 },
|
||||
readPreference,
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
async function getProject(projectId, readPreference) {
|
||||
return await db.projects.findOne(
|
||||
{ _id: projectId },
|
||||
{
|
||||
// Pulling down an empty object is fine for differentiating with null.
|
||||
projection: { _id: 0 },
|
||||
readPreference,
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
async function getProjectDocs(projectId) {
|
||||
return await db.docs
|
||||
.find(
|
||||
|
@ -124,69 +103,15 @@ async function getProjectDocs(projectId) {
|
|||
.toArray()
|
||||
}
|
||||
|
||||
async function checkProjectExistsWithReadPreference(projectId, readPreference) {
|
||||
// NOTE: Possible race conditions!
|
||||
// There are two processes which are racing with our queries:
|
||||
// 1. project deletion
|
||||
// 2. project restoring
|
||||
// For 1. we check the projects collection before deletedProjects.
|
||||
// If a project were to be delete in this very moment, we should see the
|
||||
// soft-deleted entry which is created before deleting the projects entry.
|
||||
// For 2. we check the projects collection after deletedProjects again.
|
||||
// If a project were to be restored in this very moment, it is very likely
|
||||
// to see the projects entry again.
|
||||
// Unlikely edge case: Restore+Deletion in rapid succession.
|
||||
// We could add locking to the ProjectDeleter for ruling ^ out.
|
||||
if (await getProject(projectId, readPreference)) {
|
||||
// The project is live.
|
||||
return true
|
||||
}
|
||||
const deletedProject = await getDeletedProject(projectId, readPreference)
|
||||
if (deletedProject && deletedProject.project) {
|
||||
// The project is registered for hard-deletion.
|
||||
return true
|
||||
}
|
||||
if (await getProject(projectId, readPreference)) {
|
||||
// The project was just restored.
|
||||
return true
|
||||
}
|
||||
// The project does not exist.
|
||||
return false
|
||||
}
|
||||
|
||||
async function checkProjectExistsOnPrimary(projectId) {
|
||||
return await checkProjectExistsWithReadPreference(
|
||||
projectId,
|
||||
ReadPreference.PRIMARY
|
||||
)
|
||||
}
|
||||
|
||||
async function checkProjectExistsOnSecondary(projectId) {
|
||||
return await checkProjectExistsWithReadPreference(
|
||||
projectId,
|
||||
ReadPreference.SECONDARY
|
||||
)
|
||||
}
|
||||
|
||||
async function processBatch(projectIds) {
|
||||
const doubleCheckProjectIdsOnPrimary = []
|
||||
let nDeletedDocs = 0
|
||||
async function checkProjectOnSecondary(projectId) {
|
||||
if (await checkProjectExistsOnSecondary(projectId)) {
|
||||
// Finding a project with secondary confidence is sufficient.
|
||||
return
|
||||
}
|
||||
// At this point, the secondaries deem this project as having orphaned docs.
|
||||
doubleCheckProjectIdsOnPrimary.push(projectId)
|
||||
}
|
||||
const projectsWithOrphanedDocs = await getHardDeletedProjectIds({
|
||||
projectIds,
|
||||
READ_CONCURRENCY_PRIMARY,
|
||||
READ_CONCURRENCY_SECONDARY,
|
||||
})
|
||||
|
||||
const projectsWithOrphanedDocs = []
|
||||
async function checkProjectOnPrimary(projectId) {
|
||||
if (await checkProjectExistsOnPrimary(projectId)) {
|
||||
// The project is actually live.
|
||||
return
|
||||
}
|
||||
projectsWithOrphanedDocs.push(projectId)
|
||||
let nDeletedDocs = 0
|
||||
async function countOrphanedDocs(projectId) {
|
||||
const docs = await getProjectDocs(projectId)
|
||||
nDeletedDocs += docs.length
|
||||
console.log(
|
||||
|
@ -196,16 +121,10 @@ async function processBatch(projectIds) {
|
|||
JSON.stringify(docs.map(doc => doc._id))
|
||||
)
|
||||
}
|
||||
|
||||
await promiseMapWithLimit(
|
||||
READ_CONCURRENCY_SECONDARY,
|
||||
projectIds,
|
||||
checkProjectOnSecondary
|
||||
)
|
||||
await promiseMapWithLimit(
|
||||
READ_CONCURRENCY_PRIMARY,
|
||||
doubleCheckProjectIdsOnPrimary,
|
||||
checkProjectOnPrimary
|
||||
projectsWithOrphanedDocs,
|
||||
countOrphanedDocs
|
||||
)
|
||||
if (!DRY_RUN) {
|
||||
await promiseMapWithLimit(
|
||||
|
|
Loading…
Reference in a new issue