overleaf/services/web/scripts/remove_unconfirmed_emails.mjs
Antoine Clausse 98ea3664f2 Create script: remove_unconfirmed_emails.mjs (#23079)
* Create script: remove_unconfirmed_emails.mjs

* Update script remove_unconfirmed_emails.mjs after pairing

Co-authored-by: Rebeka <rebeka.dekany@overleaf.com>

* Add user counts. Add timing

* Revert previous changes: just remove the fully unconfirmed emails

* Add tests on scripts/remove_unconfirmed_emails.mjs

* Update audit log so `removedEmail` is a string and doesn't break the admin-panel audit logs list

* Update remove_unconfirmed_emails to have a `--generate` and a `--consume` mode

* Update tests on remove_unconfirmed_emails

* Add tests checking that `--consume` doesn't delete any email

* Update script so `--consume` checks that emails shouldn't be deleted again!

* Update CSV path to `/tmp/...`

* Add test cases: deleted users, deleted email, comma in email

---------

Co-authored-by: Rebeka <rebeka.dekany@overleaf.com>
GitOrigin-RevId: 8c60b56bcdfa33bc6143d66c32a5f430fb76f6d7
2025-02-18 09:05:10 +00:00

262 lines
7.2 KiB
JavaScript

// @ts-check
import minimist from 'minimist'
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
import { db, ObjectId } from '../app/src/infrastructure/mongodb.js'
import UserAuditLogHandler from '../app/src/Features/User/UserAuditLogHandler.js'
import fs from 'node:fs/promises'
import * as csv from 'csv'
import { promisify } from 'node:util'
import _ from 'lodash'
const CSV_FILENAME = '/tmp/remove_unconfirmed_emails.csv'
/**
* @type {(records: string[][]) => Promise<string>}
*/
const stringifyAsync = promisify(csv.stringify)
/**
* @type {(csvString: string) => Promise<string[][]>}
*/
const parseAsync = promisify(csv.parse)
function usage() {
console.log('Usage: node remove_unconfirmed_emails.mjs')
console.log('Removes unconfirmed emails from users')
console.log('Options:')
console.log(
'' +
' --generate generate the CSV file (remove_unconfirmed_emails.csv) containing the emails to remove\n' +
' --consume consume the CSV file (remove_unconfirmed_emails.csv) and remove the emails (by default it is a dry-run)\n' +
' --commit apply the changes (to be used with --consume)\n'
)
process.exit(0)
}
const { generate, consume, commit, help } = minimist(process.argv.slice(2), {
boolean: ['generate', 'consume', 'commit', 'help'],
alias: { help: 'h' },
default: { generate: false, consume: false, commit: false },
})
async function generateCsvFile() {
console.time('generate_csv')
let processedUsersCount = 0
let skippedUnconfirmedPrimaries = 0
let totalEmailsToRemove = 0
let totalUsersInCsv = 0
const records = [['User ID', 'Email', 'Sign Up Date']]
await batchedUpdate(
db.users,
{
$and: [
{ emails: { $exists: true } },
{ emails: { $not: { $size: 0 } } },
// Warning: this also matches unconfirmed primary emails
{
emails: {
$elemMatch: {
$or: [{ confirmedAt: { $exists: false } }, { confirmedAt: null }],
},
},
},
],
},
async users => {
console.log('Process', users.length, 'users')
processedUsersCount += users.length
for (const user of users) {
const unconfirmedSecondaries = user.emails.filter(
email => !email.confirmedAt && email.email !== user.email
)
if (unconfirmedSecondaries.length === 0) {
// Users can have been selected because of their unconfirmed primary email
// we don't want to remove those
skippedUnconfirmedPrimaries++
continue
}
for (const email of unconfirmedSecondaries) {
records.push([
user._id.toString(),
email.email,
user.signUpDate.toISOString(),
])
}
totalUsersInCsv++
totalEmailsToRemove += unconfirmedSecondaries.length
}
},
{ _id: 1, signUpDate: 1, emails: 1, email: 1 }
)
const csvContent = await stringifyAsync(records)
await fs.writeFile(CSV_FILENAME, csvContent)
console.log()
console.log('Processed users:', processedUsersCount)
console.log()
console.log('Generated CSV file:', CSV_FILENAME)
console.log('Total emails in the CSV:', totalEmailsToRemove)
console.log('Total users in the CSV:', totalUsersInCsv)
console.log(
'Unconfirmed primary emails (skipped):',
skippedUnconfirmedPrimaries
)
console.log()
console.timeEnd('generate_csv')
console.log()
}
async function consumeCsvFile() {
console.time('consume_csv')
const csvContent = await fs.readFile(CSV_FILENAME, 'utf8')
const rows = await parseAsync(csvContent)
rows.shift() // Remove header row
const emailsByUserId = {}
for (const [userId, email] of rows) {
if (!emailsByUserId[userId]) {
emailsByUserId[userId] = []
}
emailsByUserId[userId].push(email)
}
const userIds = Object.keys(emailsByUserId)
let processedUsersCount = 0
let removedEmailsCount = 0
let totalModifiedUsersCount = 0
const skippedEmail = {
userNotFound: 0,
nowConfirmed: 0,
nowPrimary: 0,
nowRemoved: 0,
}
console.log('Total emails in the CSV:', rows.length)
console.log('Total users in the CSV:', userIds.length)
for (const userId of userIds) {
const emailsToRemove = emailsByUserId[userId]
const user = await db.users.findOne({ _id: new ObjectId(userId) })
if (!user) {
skippedEmail.userNotFound += emailsToRemove.length
continue
}
const emailsToRemoveNow = emailsToRemove.filter(email => {
const currentEmail = user.emails.find(e => e.email === email)
if (!currentEmail) {
skippedEmail.nowRemoved++
return false
}
if (currentEmail.confirmedAt) {
skippedEmail.nowConfirmed++
return false
}
if (currentEmail.email === user.email) {
skippedEmail.nowPrimary++
return false
}
return true
})
removedEmailsCount += emailsToRemoveNow.length
if (commit && emailsToRemoveNow.length > 0) {
for (const email of emailsToRemove) {
await UserAuditLogHandler.promises.addEntry(
userId,
'remove-email',
undefined,
undefined,
{
removedEmail: email,
script: true,
note: 'remove unconfirmed secondary emails',
}
)
}
const updated = await db.users.updateOne(
{ _id: new ObjectId(userId) },
{ $pull: { emails: { email: { $in: emailsToRemove } } } }
)
totalModifiedUsersCount += updated.modifiedCount
}
processedUsersCount++
if (processedUsersCount % 100 === 0) {
console.log('Processed', processedUsersCount, 'users')
}
}
console.log()
if (!commit) {
console.log('Dry-run, use --commit to apply changes')
console.log('This would be the result:')
console.log()
}
console.log('Total emails in the CSV:', rows.length)
console.log('Total users in the CSV:', userIds.length)
console.log('Total users processed:', processedUsersCount)
console.log('Total emails removed:', removedEmailsCount)
console.log('Skipped emails:', _.sum(Object.values(skippedEmail)))
console.log(' - User not found:', skippedEmail.userNotFound)
console.log(' - Email now confirmed:', skippedEmail.nowConfirmed)
console.log(' - Email now primary:', skippedEmail.nowPrimary)
console.log(' - Email now removed:', skippedEmail.nowRemoved)
console.log()
if (commit) {
console.log('Total users modified:', totalModifiedUsersCount)
} else {
console.log('Note: this was a dry-run. No changes were made.')
}
console.log()
console.timeEnd('consume_csv')
console.log()
}
async function main() {
if (help) {
return usage()
}
if (!generate && !consume) {
console.error('Error: Either --generate or --consume must be specified')
return usage()
}
if (generate && consume) {
console.error('Error: Cannot use both --generate and --consume together')
return usage()
}
if (commit && !consume) {
console.error('Error: --commit can only be used with --consume')
return usage()
}
if (generate) {
await generateCsvFile()
} else if (consume) {
await consumeCsvFile()
}
}
try {
await main()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}