overleaf/services/web/test/acceptance/src/DeleteOrphanedDocsOnlineCheckTests.js
Eric Mc Sween 5fc6d7dcb3 Merge pull request #5740 from overleaf/em-gcp-logging-web
Improve GCP logging for web

GitOrigin-RevId: b304c87a3fe46c29189f665eb3daf22c23d6eb8f
2021-11-11 09:03:09 +00:00

265 lines
11 KiB
JavaScript

const { exec } = require('child_process')
const { promisify } = require('util')
const { expect } = require('chai')
const logger = require('@overleaf/logger')
const { filterOutput } = require('./helpers/settings')
const { db, ObjectId } = require('../../../app/src/infrastructure/mongodb')
const ONE_DAY_IN_S = 60 * 60 * 24
const BATCH_SIZE = 3
function getSecondsFromObjectId(id) {
return id.getTimestamp().getTime() / 1000
}
function getObjectIdFromDate(date) {
const seconds = new Date(date).getTime() / 1000
return ObjectId.createFromTime(seconds)
}
describe('DeleteOrphanedDocsOnlineCheck', function () {
let docIds
let projectIds
let stopAtSeconds
let BATCH_LAST_ID
beforeEach('create docs', async function () {
BATCH_LAST_ID = getObjectIdFromDate('2021-03-31T00:00:00.000Z')
docIds = []
docIds[0] = getObjectIdFromDate('2021-04-01T00:00:00.000Z')
docIds[1] = getObjectIdFromDate('2021-04-02T00:00:00.000Z')
docIds[2] = getObjectIdFromDate('2021-04-11T00:00:00.000Z')
docIds[3] = getObjectIdFromDate('2021-04-12T00:00:00.000Z')
docIds[4] = getObjectIdFromDate('2021-04-13T00:00:00.000Z')
docIds[5] = getObjectIdFromDate('2021-04-14T00:00:00.000Z')
docIds[6] = getObjectIdFromDate('2021-04-15T00:00:00.000Z')
docIds[7] = getObjectIdFromDate('2021-04-16T00:01:00.000Z')
docIds[8] = getObjectIdFromDate('2021-04-16T00:02:00.000Z')
docIds[9] = getObjectIdFromDate('2021-04-16T00:03:00.000Z')
docIds[10] = getObjectIdFromDate('2021-04-16T00:04:00.000Z')
docIds[11] = getObjectIdFromDate('2021-04-16T00:05:00.000Z')
projectIds = []
projectIds[0] = getObjectIdFromDate('2021-04-01T00:00:00.000Z')
projectIds[1] = getObjectIdFromDate('2021-04-02T00:00:00.000Z')
projectIds[2] = getObjectIdFromDate('2021-04-11T00:00:00.000Z')
projectIds[3] = getObjectIdFromDate('2021-04-12T00:00:00.000Z')
projectIds[4] = getObjectIdFromDate('2021-04-13T00:00:00.000Z')
projectIds[5] = getObjectIdFromDate('2021-04-14T00:00:00.000Z')
projectIds[6] = getObjectIdFromDate('2021-04-15T00:00:00.000Z')
projectIds[7] = getObjectIdFromDate('2021-04-16T00:01:00.000Z')
projectIds[8] = getObjectIdFromDate('2021-04-16T00:02:00.000Z')
projectIds[9] = getObjectIdFromDate('2021-04-16T00:03:00.000Z')
// two docs in the same project
projectIds[10] = projectIds[9]
projectIds[11] = projectIds[4]
stopAtSeconds = new Date('2021-04-17T00:00:00.000Z').getTime() / 1000
})
beforeEach('create doc stubs', async function () {
await db.docs.insertMany([
// orphaned
{ _id: docIds[0], project_id: projectIds[0] },
{ _id: docIds[1], project_id: projectIds[1] },
{ _id: docIds[2], project_id: projectIds[2] },
{ _id: docIds[3], project_id: projectIds[3] },
// orphaned, failed hard deletion
{ _id: docIds[4], project_id: projectIds[4] },
// not orphaned, live
{ _id: docIds[5], project_id: projectIds[5] },
// not orphaned, pending hard deletion
{ _id: docIds[6], project_id: projectIds[6] },
// multiple in a single batch
{ _id: docIds[7], project_id: projectIds[7] },
{ _id: docIds[8], project_id: projectIds[8] },
{ _id: docIds[9], project_id: projectIds[9] },
// two docs in one project
{ _id: docIds[10], project_id: projectIds[10] },
{ _id: docIds[11], project_id: projectIds[11] },
])
})
beforeEach('create project stubs', async function () {
await db.projects.insertMany([
// live
{ _id: projectIds[5] },
])
})
beforeEach('create deleted project stubs', async function () {
await db.deletedProjects.insertMany([
// hard-deleted
{ deleterData: { deletedProjectId: projectIds[4] } },
// soft-deleted
{
deleterData: { deletedProjectId: projectIds[6] },
project: { _id: projectIds[6] },
},
])
})
let options
async function runScript(dryRun) {
options = {
BATCH_LAST_ID,
BATCH_SIZE,
DRY_RUN: dryRun,
INCREMENT_BY_S: ONE_DAY_IN_S,
STOP_AT_S: stopAtSeconds,
// Lower concurrency to 1 for strict sequence of log messages.
READ_CONCURRENCY_SECONDARY: 1,
READ_CONCURRENCY_PRIMARY: 1,
WRITE_CONCURRENCY: 1,
// start right away
LET_USER_DOUBLE_CHECK_INPUTS_FOR: 1,
}
let result
try {
result = await promisify(exec)(
Object.entries(options)
.map(([key, value]) => `${key}=${value}`)
.concat([
// Hide verbose log messages `calling destroy for project in docstore`
'LOG_LEVEL=error',
// Hide deprecation warnings for calling `db.collection.count`
'NODE_OPTIONS=--no-deprecation',
])
.concat(['node', 'scripts/delete_orphaned_docs_online_check.js'])
.join(' ')
)
} catch (error) {
// dump details like exit code, stdErr and stdOut
logger.error({ error }, 'script failed')
throw error
}
let { stderr: stdErr, stdout: stdOut } = result
stdErr = stdErr.split('\n').filter(filterOutput)
stdOut = stdOut.split('\n').filter(filterOutput)
const oneDayFromProjectId9InSeconds =
getSecondsFromObjectId(projectIds[9]) + ONE_DAY_IN_S
const oneDayFromProjectId9AsObjectId = getObjectIdFromDate(
1000 * oneDayFromProjectId9InSeconds
)
expect(stdOut).to.deep.equal([
`Checking projects ["${projectIds[0]}"]`,
`Deleted project ${projectIds[0]} has 1 orphaned docs: ["${docIds[0]}"]`,
`Checking projects ["${projectIds[1]}"]`,
`Deleted project ${projectIds[1]} has 1 orphaned docs: ["${docIds[1]}"]`,
`Checking projects ["${projectIds[2]}"]`,
`Deleted project ${projectIds[2]} has 1 orphaned docs: ["${docIds[2]}"]`,
`Checking projects ["${projectIds[3]}"]`,
`Deleted project ${projectIds[3]} has 1 orphaned docs: ["${docIds[3]}"]`,
// Two docs in the same project
`Checking projects ["${projectIds[4]}"]`,
`Deleted project ${projectIds[4]} has 2 orphaned docs: ["${docIds[4]}","${docIds[11]}"]`,
// Project 5 is live
`Checking projects ["${projectIds[5]}"]`,
// Project 6 is soft-deleted
`Checking projects ["${projectIds[6]}"]`,
// 7,8,9 are on the same day, but exceed the batch size of 2
`Checking projects ["${projectIds[7]}","${projectIds[8]}","${projectIds[9]}"]`,
`Deleted project ${projectIds[7]} has 1 orphaned docs: ["${docIds[7]}"]`,
`Deleted project ${projectIds[8]} has 1 orphaned docs: ["${docIds[8]}"]`,
// Two docs in the same project
`Deleted project ${projectIds[9]} has 2 orphaned docs: ["${docIds[9]}","${docIds[10]}"]`,
'',
])
expect(stdErr).to.deep.equal([
...`Options: ${JSON.stringify(options, null, 2)}`.split('\n'),
'Waiting for you to double check inputs for 1 ms',
`Processed 1 projects (1 projects with orphaned docs/1 docs deleted) until ${getObjectIdFromDate(
'2021-04-01T00:00:00.000Z'
)}`,
`Processed 2 projects (2 projects with orphaned docs/2 docs deleted) until ${getObjectIdFromDate(
'2021-04-02T00:00:00.000Z'
)}`,
`Processed 2 projects (2 projects with orphaned docs/2 docs deleted) until ${getObjectIdFromDate(
'2021-04-03T00:00:00.000Z'
)}`,
`Processed 2 projects (2 projects with orphaned docs/2 docs deleted) until ${getObjectIdFromDate(
'2021-04-04T00:00:00.000Z'
)}`,
`Processed 2 projects (2 projects with orphaned docs/2 docs deleted) until ${getObjectIdFromDate(
'2021-04-05T00:00:00.000Z'
)}`,
`Processed 2 projects (2 projects with orphaned docs/2 docs deleted) until ${getObjectIdFromDate(
'2021-04-06T00:00:00.000Z'
)}`,
`Processed 2 projects (2 projects with orphaned docs/2 docs deleted) until ${getObjectIdFromDate(
'2021-04-07T00:00:00.000Z'
)}`,
`Processed 2 projects (2 projects with orphaned docs/2 docs deleted) until ${getObjectIdFromDate(
'2021-04-08T00:00:00.000Z'
)}`,
`Processed 2 projects (2 projects with orphaned docs/2 docs deleted) until ${getObjectIdFromDate(
'2021-04-09T00:00:00.000Z'
)}`,
`Processed 2 projects (2 projects with orphaned docs/2 docs deleted) until ${getObjectIdFromDate(
'2021-04-10T00:00:00.000Z'
)}`,
`Processed 3 projects (3 projects with orphaned docs/3 docs deleted) until ${getObjectIdFromDate(
'2021-04-11T00:00:00.000Z'
)}`,
`Processed 4 projects (4 projects with orphaned docs/4 docs deleted) until ${getObjectIdFromDate(
'2021-04-12T00:00:00.000Z'
)}`,
`Processed 5 projects (5 projects with orphaned docs/6 docs deleted) until ${getObjectIdFromDate(
'2021-04-13T00:00:00.000Z'
)}`,
`Processed 6 projects (5 projects with orphaned docs/6 docs deleted) until ${getObjectIdFromDate(
'2021-04-14T00:00:00.000Z'
)}`,
`Processed 7 projects (5 projects with orphaned docs/6 docs deleted) until ${getObjectIdFromDate(
'2021-04-15T00:00:00.000Z'
)}`,
`Processed 7 projects (5 projects with orphaned docs/6 docs deleted) until ${getObjectIdFromDate(
'2021-04-16T00:00:00.000Z'
)}`,
// 7,8,9,10 are on the same day, but exceed the batch size of 3
// Project 9 has two docs.
`Processed 10 projects (8 projects with orphaned docs/10 docs deleted) until ${projectIds[9]}`,
// 10 has as ready been processed as part of the last batch -- same project_id as 9.
`Processed 10 projects (8 projects with orphaned docs/10 docs deleted) until ${oneDayFromProjectId9AsObjectId}`,
'Done.',
'',
])
}
describe('DRY_RUN=true', function () {
beforeEach('run script', async function () {
await runScript(true)
})
it('should leave docs as is', async function () {
const docs = await db.docs.find({}).toArray()
expect(docs).to.deep.equal([
{ _id: docIds[0], project_id: projectIds[0] },
{ _id: docIds[1], project_id: projectIds[1] },
{ _id: docIds[2], project_id: projectIds[2] },
{ _id: docIds[3], project_id: projectIds[3] },
{ _id: docIds[4], project_id: projectIds[4] },
{ _id: docIds[5], project_id: projectIds[5] },
{ _id: docIds[6], project_id: projectIds[6] },
{ _id: docIds[7], project_id: projectIds[7] },
{ _id: docIds[8], project_id: projectIds[8] },
{ _id: docIds[9], project_id: projectIds[9] },
{ _id: docIds[10], project_id: projectIds[10] },
{ _id: docIds[11], project_id: projectIds[11] },
])
})
})
describe('DRY_RUN=false', function () {
beforeEach('run script', async function () {
await runScript(false)
})
it('should deleted all but docs from live/soft-deleted projects', async function () {
const docs = await db.docs.find({}).toArray()
expect(docs).to.deep.equal([
// not orphaned, live
{ _id: docIds[5], project_id: projectIds[5] },
// not orphaned, pending hard deletion
{ _id: docIds[6], project_id: projectIds[6] },
])
})
})
})