mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-29 18:43:42 -05:00
Merge pull request #7579 from overleaf/jpa-refactor-orphaned-data-helper
[web] de-duplicate logic for get a list of hard deleted project ids GitOrigin-RevId: daf2ff427e24f9ef9253e4bc9ff52f53196fd854
This commit is contained in:
parent
f1f9771150
commit
0120268c57
3 changed files with 125 additions and 191 deletions
|
@ -13,11 +13,11 @@ process.env.BATCH_SIZE = BATCH_SIZE
|
||||||
process.env.MONGO_SOCKET_TIMEOUT =
|
process.env.MONGO_SOCKET_TIMEOUT =
|
||||||
parseInt(process.env.MONGO_SOCKET_TIMEOUT, 10) || 600000
|
parseInt(process.env.MONGO_SOCKET_TIMEOUT, 10) || 600000
|
||||||
|
|
||||||
const { ObjectId, ReadPreference } = require('mongodb')
|
const { ObjectId } = require('mongodb')
|
||||||
const { db } = require('../app/src/infrastructure/mongodb')
|
|
||||||
const { promiseMapWithLimit } = require('../app/src/util/promises')
|
const { promiseMapWithLimit } = require('../app/src/util/promises')
|
||||||
const { batchedUpdate } = require('./helpers/batchedUpdate')
|
const { batchedUpdate } = require('./helpers/batchedUpdate')
|
||||||
const ChatApiHandler = require('../app/src/Features/Chat/ChatApiHandler')
|
const ChatApiHandler = require('../app/src/Features/Chat/ChatApiHandler')
|
||||||
|
const { getHardDeletedProjectIds } = require('./delete_orphaned_data_helper')
|
||||||
|
|
||||||
console.log({
|
console.log({
|
||||||
DRY_RUN,
|
DRY_RUN,
|
||||||
|
@ -36,11 +36,6 @@ async function processBatch(_, rooms) {
|
||||||
if (rooms.length && rooms[0]._id) {
|
if (rooms.length && rooms[0]._id) {
|
||||||
RESULT.continueFrom = rooms[0]._id
|
RESULT.continueFrom = rooms[0]._id
|
||||||
}
|
}
|
||||||
|
|
||||||
// Logic taken from delete_orphaned_docs_online_check.js
|
|
||||||
// gets projectIds from rooms,
|
|
||||||
// then checks 'expired' status of project
|
|
||||||
|
|
||||||
const projectIds = Array.from(
|
const projectIds = Array.from(
|
||||||
new Set(rooms.map(room => room.project_id.toString()))
|
new Set(rooms.map(room => room.project_id.toString()))
|
||||||
).map(ObjectId)
|
).map(ObjectId)
|
||||||
|
@ -49,35 +44,11 @@ async function processBatch(_, rooms) {
|
||||||
JSON.stringify(projectIds)
|
JSON.stringify(projectIds)
|
||||||
)
|
)
|
||||||
|
|
||||||
const doubleCheckProjectIdsOnPrimary = []
|
const projectsWithOrphanedChat = await getHardDeletedProjectIds({
|
||||||
async function checkProjectOnSecondary(projectId) {
|
|
||||||
if (await checkProjectExistsOnSecondary(projectId)) {
|
|
||||||
// Finding a project with secondary confidence is sufficient.
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// At this point, the secondaries deem this project as having orphaned chat.
|
|
||||||
doubleCheckProjectIdsOnPrimary.push(projectId)
|
|
||||||
}
|
|
||||||
|
|
||||||
const projectsWithOrphanedChat = []
|
|
||||||
async function checkProjectOnPrimary(projectId) {
|
|
||||||
if (await checkProjectExistsOnPrimary(projectId)) {
|
|
||||||
// The project is actually live.
|
|
||||||
return
|
|
||||||
}
|
|
||||||
projectsWithOrphanedChat.push(projectId)
|
|
||||||
}
|
|
||||||
|
|
||||||
await promiseMapWithLimit(
|
|
||||||
READ_CONCURRENCY_SECONDARY,
|
|
||||||
projectIds,
|
projectIds,
|
||||||
checkProjectOnSecondary
|
|
||||||
)
|
|
||||||
await promiseMapWithLimit(
|
|
||||||
READ_CONCURRENCY_PRIMARY,
|
READ_CONCURRENCY_PRIMARY,
|
||||||
doubleCheckProjectIdsOnPrimary,
|
READ_CONCURRENCY_SECONDARY,
|
||||||
checkProjectOnPrimary
|
})
|
||||||
)
|
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
`Destroying chat for projects (${projectsWithOrphanedChat.length})`,
|
`Destroying chat for projects (${projectsWithOrphanedChat.length})`,
|
||||||
|
@ -104,72 +75,6 @@ async function processBatch(_, rooms) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getDeletedProject(projectId, readPreference) {
|
|
||||||
return await db.deletedProjects.findOne(
|
|
||||||
{ 'deleterData.deletedProjectId': projectId },
|
|
||||||
{
|
|
||||||
// There is no index on .project. Pull down something small.
|
|
||||||
projection: { 'project._id': 1 },
|
|
||||||
readPreference,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
async function getProject(projectId, readPreference) {
|
|
||||||
return await db.projects.findOne(
|
|
||||||
{ _id: projectId },
|
|
||||||
{
|
|
||||||
// Pulling down an empty object is fine for differentiating with null.
|
|
||||||
projection: { _id: 0 },
|
|
||||||
readPreference,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
async function checkProjectExistsWithReadPreference(projectId, readPreference) {
|
|
||||||
// NOTE: Possible race conditions!
|
|
||||||
// There are two processes which are racing with our queries:
|
|
||||||
// 1. project deletion
|
|
||||||
// 2. project restoring
|
|
||||||
// For 1. we check the projects collection before deletedProjects.
|
|
||||||
// If a project were to be delete in this very moment, we should see the
|
|
||||||
// soft-deleted entry which is created before deleting the projects entry.
|
|
||||||
// For 2. we check the projects collection after deletedProjects again.
|
|
||||||
// If a project were to be restored in this very moment, it is very likely
|
|
||||||
// to see the projects entry again.
|
|
||||||
// Unlikely edge case: Restore+Deletion in rapid succession.
|
|
||||||
// We could add locking to the ProjectDeleter for ruling ^ out.
|
|
||||||
if (await getProject(projectId, readPreference)) {
|
|
||||||
// The project is live.
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
const deletedProject = await getDeletedProject(projectId, readPreference)
|
|
||||||
if (deletedProject && deletedProject.project) {
|
|
||||||
// The project is registered for hard-deletion.
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if (await getProject(projectId, readPreference)) {
|
|
||||||
// The project was just restored.
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// The project does not exist.
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
async function checkProjectExistsOnPrimary(projectId) {
|
|
||||||
return await checkProjectExistsWithReadPreference(
|
|
||||||
projectId,
|
|
||||||
ReadPreference.PRIMARY
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
async function checkProjectExistsOnSecondary(projectId) {
|
|
||||||
return await checkProjectExistsWithReadPreference(
|
|
||||||
projectId,
|
|
||||||
ReadPreference.SECONDARY
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
const projection = {
|
const projection = {
|
||||||
_id: 1,
|
_id: 1,
|
||||||
|
|
110
services/web/scripts/delete_orphaned_data_helper.js
Normal file
110
services/web/scripts/delete_orphaned_data_helper.js
Normal file
|
@ -0,0 +1,110 @@
|
||||||
|
const { ReadPreference } = require('mongodb')
|
||||||
|
const { db } = require('../app/src/infrastructure/mongodb')
|
||||||
|
const { promiseMapWithLimit } = require('../app/src/util/promises')
|
||||||
|
|
||||||
|
async function getDeletedProject(projectId, readPreference) {
|
||||||
|
return await db.deletedProjects.findOne(
|
||||||
|
{ 'deleterData.deletedProjectId': projectId },
|
||||||
|
{
|
||||||
|
// There is no index on .project. Pull down something small.
|
||||||
|
projection: { 'project._id': 1 },
|
||||||
|
readPreference,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getProject(projectId, readPreference) {
|
||||||
|
return await db.projects.findOne(
|
||||||
|
{ _id: projectId },
|
||||||
|
{
|
||||||
|
// Pulling down an empty object is fine for differentiating with null.
|
||||||
|
projection: { _id: 0 },
|
||||||
|
readPreference,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
async function checkProjectExistsWithReadPreference(projectId, readPreference) {
|
||||||
|
// NOTE: Possible race conditions!
|
||||||
|
// There are two processes which are racing with our queries:
|
||||||
|
// 1. project deletion
|
||||||
|
// 2. project restoring
|
||||||
|
// For 1. we check the projects collection before deletedProjects.
|
||||||
|
// If a project were to be delete in this very moment, we should see the
|
||||||
|
// soft-deleted entry which is created before deleting the projects entry.
|
||||||
|
// For 2. we check the projects collection after deletedProjects again.
|
||||||
|
// If a project were to be restored in this very moment, it is very likely
|
||||||
|
// to see the projects entry again.
|
||||||
|
// Unlikely edge case: Restore+Deletion in rapid succession.
|
||||||
|
// We could add locking to the ProjectDeleter for ruling ^ out.
|
||||||
|
if (await getProject(projectId, readPreference)) {
|
||||||
|
// The project is live.
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
const deletedProject = await getDeletedProject(projectId, readPreference)
|
||||||
|
if (deletedProject && deletedProject.project) {
|
||||||
|
// The project is registered for hard-deletion.
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if (await getProject(projectId, readPreference)) {
|
||||||
|
// The project was just restored.
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// The project does not exist.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
async function checkProjectExistsOnPrimary(projectId) {
|
||||||
|
return await checkProjectExistsWithReadPreference(
|
||||||
|
projectId,
|
||||||
|
ReadPreference.PRIMARY
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
async function checkProjectExistsOnSecondary(projectId) {
|
||||||
|
return await checkProjectExistsWithReadPreference(
|
||||||
|
projectId,
|
||||||
|
ReadPreference.SECONDARY
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getHardDeletedProjectIds({
|
||||||
|
projectIds,
|
||||||
|
READ_CONCURRENCY_PRIMARY,
|
||||||
|
READ_CONCURRENCY_SECONDARY,
|
||||||
|
}) {
|
||||||
|
const doubleCheckProjectIdsOnPrimary = []
|
||||||
|
async function checkProjectOnSecondary(projectId) {
|
||||||
|
if (await checkProjectExistsOnSecondary(projectId)) {
|
||||||
|
// Finding a project with secondary confidence is sufficient.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// At this point, the secondaries deem this project as having orphaned docs.
|
||||||
|
doubleCheckProjectIdsOnPrimary.push(projectId)
|
||||||
|
}
|
||||||
|
|
||||||
|
const hardDeletedProjectIds = []
|
||||||
|
async function checkProjectOnPrimary(projectId) {
|
||||||
|
if (await checkProjectExistsOnPrimary(projectId)) {
|
||||||
|
// The project is actually live.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
hardDeletedProjectIds.push(projectId)
|
||||||
|
}
|
||||||
|
|
||||||
|
await promiseMapWithLimit(
|
||||||
|
READ_CONCURRENCY_SECONDARY,
|
||||||
|
projectIds,
|
||||||
|
checkProjectOnSecondary
|
||||||
|
)
|
||||||
|
await promiseMapWithLimit(
|
||||||
|
READ_CONCURRENCY_PRIMARY,
|
||||||
|
doubleCheckProjectIdsOnPrimary,
|
||||||
|
checkProjectOnPrimary
|
||||||
|
)
|
||||||
|
return hardDeletedProjectIds
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
getHardDeletedProjectIds,
|
||||||
|
}
|
|
@ -3,6 +3,7 @@ const { promisify } = require('util')
|
||||||
const { ObjectId, ReadPreference } = require('mongodb')
|
const { ObjectId, ReadPreference } = require('mongodb')
|
||||||
const { db, waitForDb } = require('../app/src/infrastructure/mongodb')
|
const { db, waitForDb } = require('../app/src/infrastructure/mongodb')
|
||||||
const { promiseMapWithLimit } = require('../app/src/util/promises')
|
const { promiseMapWithLimit } = require('../app/src/util/promises')
|
||||||
|
const { getHardDeletedProjectIds } = require('./delete_orphaned_data_helper')
|
||||||
const sleep = promisify(setTimeout)
|
const sleep = promisify(setTimeout)
|
||||||
|
|
||||||
const NOW_IN_S = Date.now() / 1000
|
const NOW_IN_S = Date.now() / 1000
|
||||||
|
@ -90,28 +91,6 @@ async function main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getDeletedProject(projectId, readPreference) {
|
|
||||||
return await db.deletedProjects.findOne(
|
|
||||||
{ 'deleterData.deletedProjectId': projectId },
|
|
||||||
{
|
|
||||||
// There is no index on .project. Pull down something small.
|
|
||||||
projection: { 'project._id': 1 },
|
|
||||||
readPreference,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
async function getProject(projectId, readPreference) {
|
|
||||||
return await db.projects.findOne(
|
|
||||||
{ _id: projectId },
|
|
||||||
{
|
|
||||||
// Pulling down an empty object is fine for differentiating with null.
|
|
||||||
projection: { _id: 0 },
|
|
||||||
readPreference,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
async function getProjectDocs(projectId) {
|
async function getProjectDocs(projectId) {
|
||||||
return await db.docs
|
return await db.docs
|
||||||
.find(
|
.find(
|
||||||
|
@ -124,69 +103,15 @@ async function getProjectDocs(projectId) {
|
||||||
.toArray()
|
.toArray()
|
||||||
}
|
}
|
||||||
|
|
||||||
async function checkProjectExistsWithReadPreference(projectId, readPreference) {
|
|
||||||
// NOTE: Possible race conditions!
|
|
||||||
// There are two processes which are racing with our queries:
|
|
||||||
// 1. project deletion
|
|
||||||
// 2. project restoring
|
|
||||||
// For 1. we check the projects collection before deletedProjects.
|
|
||||||
// If a project were to be delete in this very moment, we should see the
|
|
||||||
// soft-deleted entry which is created before deleting the projects entry.
|
|
||||||
// For 2. we check the projects collection after deletedProjects again.
|
|
||||||
// If a project were to be restored in this very moment, it is very likely
|
|
||||||
// to see the projects entry again.
|
|
||||||
// Unlikely edge case: Restore+Deletion in rapid succession.
|
|
||||||
// We could add locking to the ProjectDeleter for ruling ^ out.
|
|
||||||
if (await getProject(projectId, readPreference)) {
|
|
||||||
// The project is live.
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
const deletedProject = await getDeletedProject(projectId, readPreference)
|
|
||||||
if (deletedProject && deletedProject.project) {
|
|
||||||
// The project is registered for hard-deletion.
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if (await getProject(projectId, readPreference)) {
|
|
||||||
// The project was just restored.
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// The project does not exist.
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
async function checkProjectExistsOnPrimary(projectId) {
|
|
||||||
return await checkProjectExistsWithReadPreference(
|
|
||||||
projectId,
|
|
||||||
ReadPreference.PRIMARY
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
async function checkProjectExistsOnSecondary(projectId) {
|
|
||||||
return await checkProjectExistsWithReadPreference(
|
|
||||||
projectId,
|
|
||||||
ReadPreference.SECONDARY
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
async function processBatch(projectIds) {
|
async function processBatch(projectIds) {
|
||||||
const doubleCheckProjectIdsOnPrimary = []
|
const projectsWithOrphanedDocs = await getHardDeletedProjectIds({
|
||||||
let nDeletedDocs = 0
|
projectIds,
|
||||||
async function checkProjectOnSecondary(projectId) {
|
READ_CONCURRENCY_PRIMARY,
|
||||||
if (await checkProjectExistsOnSecondary(projectId)) {
|
READ_CONCURRENCY_SECONDARY,
|
||||||
// Finding a project with secondary confidence is sufficient.
|
})
|
||||||
return
|
|
||||||
}
|
|
||||||
// At this point, the secondaries deem this project as having orphaned docs.
|
|
||||||
doubleCheckProjectIdsOnPrimary.push(projectId)
|
|
||||||
}
|
|
||||||
|
|
||||||
const projectsWithOrphanedDocs = []
|
let nDeletedDocs = 0
|
||||||
async function checkProjectOnPrimary(projectId) {
|
async function countOrphanedDocs(projectId) {
|
||||||
if (await checkProjectExistsOnPrimary(projectId)) {
|
|
||||||
// The project is actually live.
|
|
||||||
return
|
|
||||||
}
|
|
||||||
projectsWithOrphanedDocs.push(projectId)
|
|
||||||
const docs = await getProjectDocs(projectId)
|
const docs = await getProjectDocs(projectId)
|
||||||
nDeletedDocs += docs.length
|
nDeletedDocs += docs.length
|
||||||
console.log(
|
console.log(
|
||||||
|
@ -196,16 +121,10 @@ async function processBatch(projectIds) {
|
||||||
JSON.stringify(docs.map(doc => doc._id))
|
JSON.stringify(docs.map(doc => doc._id))
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
await promiseMapWithLimit(
|
|
||||||
READ_CONCURRENCY_SECONDARY,
|
|
||||||
projectIds,
|
|
||||||
checkProjectOnSecondary
|
|
||||||
)
|
|
||||||
await promiseMapWithLimit(
|
await promiseMapWithLimit(
|
||||||
READ_CONCURRENCY_PRIMARY,
|
READ_CONCURRENCY_PRIMARY,
|
||||||
doubleCheckProjectIdsOnPrimary,
|
projectsWithOrphanedDocs,
|
||||||
checkProjectOnPrimary
|
countOrphanedDocs
|
||||||
)
|
)
|
||||||
if (!DRY_RUN) {
|
if (!DRY_RUN) {
|
||||||
await promiseMapWithLimit(
|
await promiseMapWithLimit(
|
||||||
|
|
Loading…
Reference in a new issue