overleaf/services/web/scripts/add_user_count_to_csv.mjs
Jakob Ackermann a7517eefcb Merge pull request #21427 from overleaf/jpa-populate-db-on-import
[web] populate db with collections on import, ahead of waitForDb() call

GitOrigin-RevId: 7eb4cd61c2052187acd9947d7060f54d9822d314
2024-11-01 09:05:15 +00:00

95 lines
3 KiB
JavaScript

// Usage: node scripts/add_user_count_to_csv.mjs [OPTS] [INPUT-FILE]
// Looks up the number of users for each domain in the input csv file and adds
// columns for the number of users in the domain, subdomains, and total.
import fs from 'fs'
// https://github.com/import-js/eslint-plugin-import/issues/1810
// eslint-disable-next-line import/no-unresolved
import * as csv from 'csv/sync'
import minimist from 'minimist'
import UserGetter from '../app/src/Features/User/UserGetter.js'
import { db } from '../app/src/infrastructure/mongodb.js'
import _ from 'lodash'
const argv = minimist(process.argv.slice(2), {
string: ['domain', 'output'],
boolean: ['help'],
alias: {
domain: 'd',
output: 'o',
},
default: {
domain: 'Email domain',
output: '/dev/stdout',
},
})
if (argv.help || argv._.length > 1) {
console.error(`Usage: node scripts/add_user_count_to_csv.js [OPTS] [INPUT-FILE]
Looks up the number of users for each domain in the input file and adds
columns for the number of users in the domain, subdomains, and total.
Options:
--domain name of the csv column containing the email domain (default: "Email domain")
--output output file (default: /dev/stdout)
`)
process.exit(1)
}
const input = fs.readFileSync(argv._[0], 'utf8')
const records = csv.parse(input, { columns: true })
if (records.length === 0) {
console.error('No records in input file')
process.exit(1)
}
async function main() {
for (const record of records) {
const domain = record[argv.domain]
const { domainUserCount, subdomainUserCount } = await getUserCount(domain, {
_id: 1,
})
record['Domain Users'] = domainUserCount
record['Subdomain Users'] = subdomainUserCount
record['Total Users'] = domainUserCount + subdomainUserCount
}
const output = csv.stringify(records, { header: true })
fs.writeFileSync(argv.output, output)
}
async function getUserCount(domain) {
const domainUsers = await UserGetter.promises.getUsersByHostname(domain, {
_id: 1,
})
const subdomainUsers = await getUsersByHostnameWithSubdomain(domain, {
_id: 1,
})
return {
domainUserCount: domainUsers.length,
subdomainUserCount: subdomainUsers.length,
}
}
async function getUsersByHostnameWithSubdomain(domain, projection) {
const reversedDomain = domain.trim().split('').reverse().join('')
const reversedDomainRegex = _.escapeRegExp(reversedDomain)
const query = {
emails: { $exists: true },
// look for users in subdomains of a domain, but not the domain itself
// e.g. for domain 'foo.edu', match 'cs.foo.edu' but not 'foo.edu'
// we use the reversed hostname index to do this efficiently
// we need to escape the domain name to prevent '.' from matching any character
'emails.reversedHostname': { $regex: '^' + reversedDomainRegex + '\\.' },
}
return await db.users.find(query, { projection }).toArray()
}
try {
await main()
console.log('Done')
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}