mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-21 20:47:08 -05:00
a7517eefcb
[web] populate db with collections on import, ahead of waitForDb() call GitOrigin-RevId: 7eb4cd61c2052187acd9947d7060f54d9822d314
95 lines
3 KiB
JavaScript
95 lines
3 KiB
JavaScript
// Usage: node scripts/add_user_count_to_csv.mjs [OPTS] [INPUT-FILE]
|
|
// Looks up the number of users for each domain in the input csv file and adds
|
|
// columns for the number of users in the domain, subdomains, and total.
|
|
import fs from 'fs'
|
|
// https://github.com/import-js/eslint-plugin-import/issues/1810
|
|
// eslint-disable-next-line import/no-unresolved
|
|
import * as csv from 'csv/sync'
|
|
import minimist from 'minimist'
|
|
import UserGetter from '../app/src/Features/User/UserGetter.js'
|
|
import { db } from '../app/src/infrastructure/mongodb.js'
|
|
import _ from 'lodash'
|
|
|
|
const argv = minimist(process.argv.slice(2), {
|
|
string: ['domain', 'output'],
|
|
boolean: ['help'],
|
|
alias: {
|
|
domain: 'd',
|
|
output: 'o',
|
|
},
|
|
default: {
|
|
domain: 'Email domain',
|
|
output: '/dev/stdout',
|
|
},
|
|
})
|
|
|
|
if (argv.help || argv._.length > 1) {
|
|
console.error(`Usage: node scripts/add_user_count_to_csv.js [OPTS] [INPUT-FILE]
|
|
Looks up the number of users for each domain in the input file and adds
|
|
columns for the number of users in the domain, subdomains, and total.
|
|
|
|
Options:
|
|
|
|
--domain name of the csv column containing the email domain (default: "Email domain")
|
|
--output output file (default: /dev/stdout)
|
|
`)
|
|
process.exit(1)
|
|
}
|
|
|
|
const input = fs.readFileSync(argv._[0], 'utf8')
|
|
const records = csv.parse(input, { columns: true })
|
|
|
|
if (records.length === 0) {
|
|
console.error('No records in input file')
|
|
process.exit(1)
|
|
}
|
|
|
|
async function main() {
|
|
for (const record of records) {
|
|
const domain = record[argv.domain]
|
|
const { domainUserCount, subdomainUserCount } = await getUserCount(domain, {
|
|
_id: 1,
|
|
})
|
|
record['Domain Users'] = domainUserCount
|
|
record['Subdomain Users'] = subdomainUserCount
|
|
record['Total Users'] = domainUserCount + subdomainUserCount
|
|
}
|
|
const output = csv.stringify(records, { header: true })
|
|
fs.writeFileSync(argv.output, output)
|
|
}
|
|
|
|
async function getUserCount(domain) {
|
|
const domainUsers = await UserGetter.promises.getUsersByHostname(domain, {
|
|
_id: 1,
|
|
})
|
|
const subdomainUsers = await getUsersByHostnameWithSubdomain(domain, {
|
|
_id: 1,
|
|
})
|
|
return {
|
|
domainUserCount: domainUsers.length,
|
|
subdomainUserCount: subdomainUsers.length,
|
|
}
|
|
}
|
|
|
|
async function getUsersByHostnameWithSubdomain(domain, projection) {
|
|
const reversedDomain = domain.trim().split('').reverse().join('')
|
|
const reversedDomainRegex = _.escapeRegExp(reversedDomain)
|
|
const query = {
|
|
emails: { $exists: true },
|
|
// look for users in subdomains of a domain, but not the domain itself
|
|
// e.g. for domain 'foo.edu', match 'cs.foo.edu' but not 'foo.edu'
|
|
// we use the reversed hostname index to do this efficiently
|
|
// we need to escape the domain name to prevent '.' from matching any character
|
|
'emails.reversedHostname': { $regex: '^' + reversedDomainRegex + '\\.' },
|
|
}
|
|
return await db.users.find(query, { projection }).toArray()
|
|
}
|
|
|
|
try {
|
|
await main()
|
|
console.log('Done')
|
|
process.exit(0)
|
|
} catch (error) {
|
|
console.error(error)
|
|
process.exit(1)
|
|
}
|