mirror of
https://github.com/overleaf/overleaf.git
synced 2025-04-22 02:18:27 +00:00
Merge pull request #22260 from overleaf/rh-odc-students-never-used
Add student filter and country field to 'never used LaTeX' ODC script GitOrigin-RevId: 9db298a48b7d70b59b81388ff93fc6c9575861d7
This commit is contained in:
parent
f267350a46
commit
172aeb59d1
1 changed files with 139 additions and 13 deletions
|
@ -1,9 +1,13 @@
|
|||
const csv = require('csv')
|
||||
const fs = require('fs')
|
||||
const minimist = require('minimist')
|
||||
const {
|
||||
OnboardingDataCollection,
|
||||
} = require('../app/src/models/OnboardingDataCollection')
|
||||
const { User } = require('../app/src/models/User')
|
||||
const SubscriptionLocator = require('../app/src/Features/Subscription/SubscriptionLocator')
|
||||
const Settings = require('@overleaf/settings')
|
||||
const { fetchJson } = require('@overleaf/fetch-utils')
|
||||
|
||||
/**
|
||||
* This script extracts ODC data with some extra fields, and filters on registration date and LaTeX experience
|
||||
|
@ -25,9 +29,60 @@ const { User } = require('../app/src/models/User')
|
|||
* - kubectl cp web-standalone-prod-XXXXX:/tmp/odc_neverUsedLatex.csv ~/odc_neverUsedLatex.csv
|
||||
*/
|
||||
|
||||
const getEmails = async userIds => {
|
||||
function usage() {
|
||||
console.log(
|
||||
`
|
||||
Onboarding Data Collection extraction, outputs to /tmp/odc_neverUsedLatex.csv
|
||||
|
||||
Usage:
|
||||
node scripts/extract_onboardingdatacollection_never_used_latex.js [--registeredBefore=<date>] [--studentsOnly] [--includeSignUpDate] [--includeCountry] [--includePlanCode]
|
||||
|
||||
Options:
|
||||
--help Show this screen
|
||||
|
||||
--registeredBefore=<date> Limit to users registered before ISO 8601 date (eg. 2024-08-01)
|
||||
|
||||
--studentsOnly Only include users whose primary occupation is 'university' or 'school'
|
||||
|
||||
--includeSignUpDate Include signUpDate column
|
||||
|
||||
--includeCountry Include countryCode column (inferred from institution and possibly missing)
|
||||
|
||||
--includePlanCode Include planCode column
|
||||
`
|
||||
)
|
||||
}
|
||||
|
||||
function parseArgs() {
|
||||
const argv = minimist(process.argv.slice(2), {
|
||||
string: ['registeredBefore'],
|
||||
bool: [
|
||||
'help',
|
||||
'studentsOnly',
|
||||
'includeSignUpDate',
|
||||
'includeCountry',
|
||||
'includePlanCode',
|
||||
],
|
||||
default: {
|
||||
help: false,
|
||||
studentsOnly: false,
|
||||
includeSignUpDate: false,
|
||||
includeCountry: false,
|
||||
includePlanCode: false,
|
||||
registeredBefore: '2024-02-18',
|
||||
},
|
||||
})
|
||||
|
||||
if (argv.help) {
|
||||
usage()
|
||||
process.exit(0)
|
||||
}
|
||||
return argv
|
||||
}
|
||||
|
||||
async function getEmails(userIds, { registeredBefore }) {
|
||||
const userEmails = await User.find(
|
||||
{ _id: { $in: userIds }, signUpDate: { $gte: new Date(2024, 1, 18) } },
|
||||
{ _id: { $in: userIds }, signUpDate: { $lte: new Date(registeredBefore) } },
|
||||
{ email: 1, signUpDate: 1 }
|
||||
).exec()
|
||||
return userEmails.map(({ email, signUpDate }) => ({
|
||||
|
@ -36,20 +91,91 @@ const getEmails = async userIds => {
|
|||
}))
|
||||
}
|
||||
|
||||
const getUsers = async () => {
|
||||
const cursor = OnboardingDataCollection.find({ usedLatex: 'never' }).cursor()
|
||||
|
||||
const userIds = []
|
||||
for (let doc = await cursor.next(); doc != null; doc = await cursor.next()) {
|
||||
userIds.push(doc._id.toString())
|
||||
async function getUsers({ studentsOnly }) {
|
||||
const odcCriteria = { usedLatex: 'never' }
|
||||
if (studentsOnly) {
|
||||
odcCriteria.primaryOccupation = 'university'
|
||||
}
|
||||
|
||||
return userIds
|
||||
const cursor = OnboardingDataCollection.find(odcCriteria).cursor()
|
||||
|
||||
const userIds = []
|
||||
const institutionNames = []
|
||||
for (let doc = await cursor.next(); doc != null; doc = await cursor.next()) {
|
||||
userIds.push(doc._id.toString())
|
||||
institutionNames.push(doc.institutionName)
|
||||
}
|
||||
|
||||
return { userIds, institutionNames }
|
||||
}
|
||||
|
||||
const runScript = async () => {
|
||||
const users = await getUsers()
|
||||
const userEmails = await getEmails(users)
|
||||
async function getUserPlanCodes(users) {
|
||||
const planCodes = []
|
||||
for await (const user of users) {
|
||||
const subscription =
|
||||
await SubscriptionLocator.promises.getUsersSubscription(user)
|
||||
planCodes.push(subscription?.planCode || 'free')
|
||||
}
|
||||
return planCodes
|
||||
}
|
||||
|
||||
// inferred from institution so will not always be available or accurate
|
||||
async function getUserCountries(institutions) {
|
||||
const countryCodes = []
|
||||
// cache any institutions we lookup to avoid making duplicate calls
|
||||
const institutionLookups = {}
|
||||
for await (const inst of institutions) {
|
||||
if (!inst) {
|
||||
countryCodes.push(undefined)
|
||||
continue
|
||||
}
|
||||
if (institutionLookups[inst]) {
|
||||
countryCodes.push(institutionLookups[inst])
|
||||
continue
|
||||
}
|
||||
try {
|
||||
const url = `${Settings.apis.web.url}/institutions/search?search=${encodeURIComponent(inst)}&max_results=1`
|
||||
const response = await fetchJson(url)
|
||||
countryCodes.push(response[0]?.country_code)
|
||||
institutionLookups[inst] = response[0]?.country_code
|
||||
} catch (e) {
|
||||
// if institution search fails just move on
|
||||
console.log(`Error when looking up institution ${inst}: ${e.message}`)
|
||||
countryCodes.push(undefined)
|
||||
}
|
||||
}
|
||||
return countryCodes
|
||||
}
|
||||
|
||||
async function runScript() {
|
||||
const columns = ['email']
|
||||
|
||||
const args = parseArgs()
|
||||
|
||||
if (args.includeSignUpDate) {
|
||||
columns.push('signUpDate')
|
||||
}
|
||||
|
||||
const users = await getUsers(args)
|
||||
let userEmails = await getEmails(users.userIds, args)
|
||||
|
||||
if (args.includePlanCode) {
|
||||
columns.push('planCode')
|
||||
const planCodes = await getUserPlanCodes(users.userIds)
|
||||
userEmails = userEmails.map((user, index) => {
|
||||
user.planCode = planCodes[index]
|
||||
return user
|
||||
})
|
||||
}
|
||||
|
||||
if (args.includeCountry) {
|
||||
columns.push('country')
|
||||
const countryCodes = await getUserCountries(users.institutionNames)
|
||||
userEmails = userEmails.map((user, index) => {
|
||||
user.country = countryCodes[index]
|
||||
return user
|
||||
})
|
||||
}
|
||||
|
||||
console.log('Starting to write to csv file...')
|
||||
|
||||
|
@ -57,7 +183,7 @@ const runScript = async () => {
|
|||
userEmails,
|
||||
{
|
||||
header: true,
|
||||
columns: ['email', 'signUpDate'],
|
||||
columns,
|
||||
},
|
||||
function (err, output) {
|
||||
fs.writeFileSync('/tmp/odc_neverUsedLatex.csv', output)
|
||||
|
|
Loading…
Add table
Reference in a new issue