diff --git a/services/web/scripts/extract_onboardingdatacollection_never_used_latex.js b/services/web/scripts/extract_onboardingdatacollection_never_used_latex.js index e25557983e..0d038df870 100644 --- a/services/web/scripts/extract_onboardingdatacollection_never_used_latex.js +++ b/services/web/scripts/extract_onboardingdatacollection_never_used_latex.js @@ -1,9 +1,13 @@ const csv = require('csv') const fs = require('fs') +const minimist = require('minimist') const { OnboardingDataCollection, } = require('../app/src/models/OnboardingDataCollection') const { User } = require('../app/src/models/User') +const SubscriptionLocator = require('../app/src/Features/Subscription/SubscriptionLocator') +const Settings = require('@overleaf/settings') +const { fetchJson } = require('@overleaf/fetch-utils') /** * This script extracts ODC data with some extra fields, and filters on registration date and LaTeX experience @@ -25,9 +29,60 @@ const { User } = require('../app/src/models/User') * - kubectl cp web-standalone-prod-XXXXX:/tmp/odc_neverUsedLatex.csv ~/odc_neverUsedLatex.csv */ -const getEmails = async userIds => { +function usage() { + console.log( + ` + Onboarding Data Collection extraction, outputs to /tmp/odc_neverUsedLatex.csv + + Usage: + node scripts/extract_onboardingdatacollection_never_used_latex.js [--registeredBefore=] [--studentsOnly] [--includeSignUpDate] [--includeCountry] [--includePlanCode] + + Options: + --help Show this screen + + --registeredBefore= Limit to users registered before ISO 8601 date (eg. 2024-08-01) + + --studentsOnly Only include users whose primary occupation is 'university' or 'school' + + --includeSignUpDate Include signUpDate column + + --includeCountry Include countryCode column (inferred from institution and possibly missing) + + --includePlanCode Include planCode column + ` + ) +} + +function parseArgs() { + const argv = minimist(process.argv.slice(2), { + string: ['registeredBefore'], + bool: [ + 'help', + 'studentsOnly', + 'includeSignUpDate', + 'includeCountry', + 'includePlanCode', + ], + default: { + help: false, + studentsOnly: false, + includeSignUpDate: false, + includeCountry: false, + includePlanCode: false, + registeredBefore: '2024-02-18', + }, + }) + + if (argv.help) { + usage() + process.exit(0) + } + return argv +} + +async function getEmails(userIds, { registeredBefore }) { const userEmails = await User.find( - { _id: { $in: userIds }, signUpDate: { $gte: new Date(2024, 1, 18) } }, + { _id: { $in: userIds }, signUpDate: { $lte: new Date(registeredBefore) } }, { email: 1, signUpDate: 1 } ).exec() return userEmails.map(({ email, signUpDate }) => ({ @@ -36,20 +91,91 @@ const getEmails = async userIds => { })) } -const getUsers = async () => { - const cursor = OnboardingDataCollection.find({ usedLatex: 'never' }).cursor() - - const userIds = [] - for (let doc = await cursor.next(); doc != null; doc = await cursor.next()) { - userIds.push(doc._id.toString()) +async function getUsers({ studentsOnly }) { + const odcCriteria = { usedLatex: 'never' } + if (studentsOnly) { + odcCriteria.primaryOccupation = 'university' } - return userIds + const cursor = OnboardingDataCollection.find(odcCriteria).cursor() + + const userIds = [] + const institutionNames = [] + for (let doc = await cursor.next(); doc != null; doc = await cursor.next()) { + userIds.push(doc._id.toString()) + institutionNames.push(doc.institutionName) + } + + return { userIds, institutionNames } } -const runScript = async () => { - const users = await getUsers() - const userEmails = await getEmails(users) +async function getUserPlanCodes(users) { + const planCodes = [] + for await (const user of users) { + const subscription = + await SubscriptionLocator.promises.getUsersSubscription(user) + planCodes.push(subscription?.planCode || 'free') + } + return planCodes +} + +// inferred from institution so will not always be available or accurate +async function getUserCountries(institutions) { + const countryCodes = [] + // cache any institutions we lookup to avoid making duplicate calls + const institutionLookups = {} + for await (const inst of institutions) { + if (!inst) { + countryCodes.push(undefined) + continue + } + if (institutionLookups[inst]) { + countryCodes.push(institutionLookups[inst]) + continue + } + try { + const url = `${Settings.apis.web.url}/institutions/search?search=${encodeURIComponent(inst)}&max_results=1` + const response = await fetchJson(url) + countryCodes.push(response[0]?.country_code) + institutionLookups[inst] = response[0]?.country_code + } catch (e) { + // if institution search fails just move on + console.log(`Error when looking up institution ${inst}: ${e.message}`) + countryCodes.push(undefined) + } + } + return countryCodes +} + +async function runScript() { + const columns = ['email'] + + const args = parseArgs() + + if (args.includeSignUpDate) { + columns.push('signUpDate') + } + + const users = await getUsers(args) + let userEmails = await getEmails(users.userIds, args) + + if (args.includePlanCode) { + columns.push('planCode') + const planCodes = await getUserPlanCodes(users.userIds) + userEmails = userEmails.map((user, index) => { + user.planCode = planCodes[index] + return user + }) + } + + if (args.includeCountry) { + columns.push('country') + const countryCodes = await getUserCountries(users.institutionNames) + userEmails = userEmails.map((user, index) => { + user.country = countryCodes[index] + return user + }) + } console.log('Starting to write to csv file...') @@ -57,7 +183,7 @@ const runScript = async () => { userEmails, { header: true, - columns: ['email', 'signUpDate'], + columns, }, function (err, output) { fs.writeFileSync('/tmp/odc_neverUsedLatex.csv', output)