Merge pull request #22260 from overleaf/rh-odc-students-never-used

Add student filter and country field to 'never used LaTeX' ODC script

GitOrigin-RevId: 9db298a48b7d70b59b81388ff93fc6c9575861d7
This commit is contained in:
roo hutton 2024-12-02 12:32:17 +00:00 committed by Copybot
parent f267350a46
commit 172aeb59d1

View file

@ -1,9 +1,13 @@
const csv = require('csv')
const fs = require('fs')
const minimist = require('minimist')
const {
OnboardingDataCollection,
} = require('../app/src/models/OnboardingDataCollection')
const { User } = require('../app/src/models/User')
const SubscriptionLocator = require('../app/src/Features/Subscription/SubscriptionLocator')
const Settings = require('@overleaf/settings')
const { fetchJson } = require('@overleaf/fetch-utils')
/**
* This script extracts ODC data with some extra fields, and filters on registration date and LaTeX experience
@ -25,9 +29,60 @@ const { User } = require('../app/src/models/User')
* - kubectl cp web-standalone-prod-XXXXX:/tmp/odc_neverUsedLatex.csv ~/odc_neverUsedLatex.csv
*/
const getEmails = async userIds => {
function usage() {
console.log(
`
Onboarding Data Collection extraction, outputs to /tmp/odc_neverUsedLatex.csv
Usage:
node scripts/extract_onboardingdatacollection_never_used_latex.js [--registeredBefore=<date>] [--studentsOnly] [--includeSignUpDate] [--includeCountry] [--includePlanCode]
Options:
--help Show this screen
--registeredBefore=<date> Limit to users registered before ISO 8601 date (eg. 2024-08-01)
--studentsOnly Only include users whose primary occupation is 'university' or 'school'
--includeSignUpDate Include signUpDate column
--includeCountry Include countryCode column (inferred from institution and possibly missing)
--includePlanCode Include planCode column
`
)
}
function parseArgs() {
const argv = minimist(process.argv.slice(2), {
string: ['registeredBefore'],
bool: [
'help',
'studentsOnly',
'includeSignUpDate',
'includeCountry',
'includePlanCode',
],
default: {
help: false,
studentsOnly: false,
includeSignUpDate: false,
includeCountry: false,
includePlanCode: false,
registeredBefore: '2024-02-18',
},
})
if (argv.help) {
usage()
process.exit(0)
}
return argv
}
async function getEmails(userIds, { registeredBefore }) {
const userEmails = await User.find(
{ _id: { $in: userIds }, signUpDate: { $gte: new Date(2024, 1, 18) } },
{ _id: { $in: userIds }, signUpDate: { $lte: new Date(registeredBefore) } },
{ email: 1, signUpDate: 1 }
).exec()
return userEmails.map(({ email, signUpDate }) => ({
@ -36,20 +91,91 @@ const getEmails = async userIds => {
}))
}
const getUsers = async () => {
const cursor = OnboardingDataCollection.find({ usedLatex: 'never' }).cursor()
const userIds = []
for (let doc = await cursor.next(); doc != null; doc = await cursor.next()) {
userIds.push(doc._id.toString())
async function getUsers({ studentsOnly }) {
const odcCriteria = { usedLatex: 'never' }
if (studentsOnly) {
odcCriteria.primaryOccupation = 'university'
}
return userIds
const cursor = OnboardingDataCollection.find(odcCriteria).cursor()
const userIds = []
const institutionNames = []
for (let doc = await cursor.next(); doc != null; doc = await cursor.next()) {
userIds.push(doc._id.toString())
institutionNames.push(doc.institutionName)
}
return { userIds, institutionNames }
}
const runScript = async () => {
const users = await getUsers()
const userEmails = await getEmails(users)
async function getUserPlanCodes(users) {
const planCodes = []
for await (const user of users) {
const subscription =
await SubscriptionLocator.promises.getUsersSubscription(user)
planCodes.push(subscription?.planCode || 'free')
}
return planCodes
}
// inferred from institution so will not always be available or accurate
async function getUserCountries(institutions) {
const countryCodes = []
// cache any institutions we lookup to avoid making duplicate calls
const institutionLookups = {}
for await (const inst of institutions) {
if (!inst) {
countryCodes.push(undefined)
continue
}
if (institutionLookups[inst]) {
countryCodes.push(institutionLookups[inst])
continue
}
try {
const url = `${Settings.apis.web.url}/institutions/search?search=${encodeURIComponent(inst)}&max_results=1`
const response = await fetchJson(url)
countryCodes.push(response[0]?.country_code)
institutionLookups[inst] = response[0]?.country_code
} catch (e) {
// if institution search fails just move on
console.log(`Error when looking up institution ${inst}: ${e.message}`)
countryCodes.push(undefined)
}
}
return countryCodes
}
async function runScript() {
const columns = ['email']
const args = parseArgs()
if (args.includeSignUpDate) {
columns.push('signUpDate')
}
const users = await getUsers(args)
let userEmails = await getEmails(users.userIds, args)
if (args.includePlanCode) {
columns.push('planCode')
const planCodes = await getUserPlanCodes(users.userIds)
userEmails = userEmails.map((user, index) => {
user.planCode = planCodes[index]
return user
})
}
if (args.includeCountry) {
columns.push('country')
const countryCodes = await getUserCountries(users.institutionNames)
userEmails = userEmails.map((user, index) => {
user.country = countryCodes[index]
return user
})
}
console.log('Starting to write to csv file...')
@ -57,7 +183,7 @@ const runScript = async () => {
userEmails,
{
header: true,
columns: ['email', 'signUpDate'],
columns,
},
function (err, output) {
fs.writeFileSync('/tmp/odc_neverUsedLatex.csv', output)