mirror of
https://github.com/overleaf/overleaf.git
synced 2025-01-27 02:11:44 +00:00
Merge pull request #14610 from overleaf/bg-filter-csv-by-registered-user-domains
add script to filter csv by user email domains GitOrigin-RevId: d0faf1fd8ace2ec1bde0ffa5b4595e0894952119
This commit is contained in:
parent
d92dc66e30
commit
601365bcc6
1 changed files with 95 additions and 0 deletions
95
services/web/scripts/add_user_count_to_csv.js
Normal file
95
services/web/scripts/add_user_count_to_csv.js
Normal file
|
@ -0,0 +1,95 @@
|
|||
// Usage: node scripts/add_user_count_to_csv.js [OPTS] [INPUT-FILE]
|
||||
// Looks up the number of users for each domain in the input csv file and adds
|
||||
// columns for the number of users in the domain, subdomains, and total.
|
||||
const fs = require('fs')
|
||||
const csv = require('csv/sync')
|
||||
const minimist = require('minimist')
|
||||
const UserGetter = require('../app/src/Features/User/UserGetter')
|
||||
const { db, waitForDb } = require('../app/src/infrastructure/mongodb')
|
||||
const _ = require('lodash')
|
||||
|
||||
const argv = minimist(process.argv.slice(2), {
|
||||
string: ['domain', 'output'],
|
||||
boolean: ['help'],
|
||||
alias: {
|
||||
domain: 'd',
|
||||
output: 'o',
|
||||
},
|
||||
default: {
|
||||
domain: 'Email domain',
|
||||
output: '/dev/stdout',
|
||||
},
|
||||
})
|
||||
|
||||
if (argv.help || argv._.length > 1) {
|
||||
console.error(`Usage: node scripts/add_user_count_to_csv.js [OPTS] [INPUT-FILE]
|
||||
Looks up the number of users for each domain in the input file and adds
|
||||
columns for the number of users in the domain, subdomains, and total.
|
||||
|
||||
Options:
|
||||
|
||||
--domain name of the csv column containing the email domain (default: "Email domain")
|
||||
--output output file (default: /dev/stdout)
|
||||
`)
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
const input = fs.readFileSync(argv._[0], 'utf8')
|
||||
const records = csv.parse(input, { columns: true })
|
||||
|
||||
if (records.length === 0) {
|
||||
console.error('No records in input file')
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
async function main() {
|
||||
await waitForDb()
|
||||
for (const record of records) {
|
||||
const domain = record[argv.domain]
|
||||
const { domainUserCount, subdomainUserCount } = await getUserCount(domain, {
|
||||
_id: 1,
|
||||
})
|
||||
record['Domain Users'] = domainUserCount
|
||||
record['Subdomain Users'] = subdomainUserCount
|
||||
record['Total Users'] = domainUserCount + subdomainUserCount
|
||||
}
|
||||
const output = csv.stringify(records, { header: true })
|
||||
fs.writeFileSync(argv.output, output)
|
||||
}
|
||||
|
||||
async function getUserCount(domain) {
|
||||
const domainUsers = await UserGetter.promises.getUsersByHostname(domain, {
|
||||
_id: 1,
|
||||
})
|
||||
const subdomainUsers = await getUsersByHostnameWithSubdomain(domain, {
|
||||
_id: 1,
|
||||
})
|
||||
return {
|
||||
domainUserCount: domainUsers.length,
|
||||
subdomainUserCount: subdomainUsers.length,
|
||||
}
|
||||
}
|
||||
|
||||
async function getUsersByHostnameWithSubdomain(domain, projection) {
|
||||
const reversedDomain = domain.trim().split('').reverse().join('')
|
||||
const reversedDomainRegex = _.escapeRegExp(reversedDomain)
|
||||
const query = {
|
||||
emails: { $exists: true },
|
||||
// look for users in subdomains of a domain, but not the domain itself
|
||||
// e.g. for domain 'foo.edu', match 'cs.foo.edu' but not 'foo.edu'
|
||||
// we use the reversed hostname index to do this efficiently
|
||||
// we need to escape the domain name to prevent '.' from matching any character
|
||||
'emails.reversedHostname': { $regex: '^' + reversedDomainRegex + '\\.' },
|
||||
}
|
||||
return await db.users.find(query, { projection }).toArray()
|
||||
}
|
||||
|
||||
main()
|
||||
.then(() => {
|
||||
console.error('Done')
|
||||
process.exit(0)
|
||||
})
|
||||
.catch(err => {
|
||||
console.error(err)
|
||||
process.exit(1)
|
||||
})
|
Loading…
Reference in a new issue