overleaf/services/web/scripts/extract_onboardingdatacollection_csv.js
Antoine Clausse 8b670f4378 Create a script to extract OnboardingDataCollection (#17657)
* Create a script to extract OnboardingDataCollection

* Add `userId` to the CSV

* Add `firstName` and `lastName` booleans to the CSV

GitOrigin-RevId: ad1c8b9f88358553651f55563dad597767ab7a88
2024-03-28 09:04:33 +00:00

96 lines
2.6 KiB
JavaScript

const csv = require('csv')
const fs = require('fs')
const {
OnboardingDataCollection,
} = require('../app/src/models/OnboardingDataCollection')
/**
* This script extracts the OnboardingDataCollection collection from the database
* and writes it to a CSV file.
*
* Usage:
* - Locally:
* - docker compose exec web bash
* - node services/web/scripts/extract_onboardingdatacollection_csv.js
* - On the server:
* - rake connect:app[staging,web]
* - node web/scripts/extract_onboardingdatacollection_csv.js
* - exit
* - kubectl cp web-standalone-prod-XXXXX:/tmp/onboardingDataCollection.csv ~/onboardingDataCollection.csv
*
*/
const mapFields = doc => {
return {
primaryOccupation: doc.primaryOccupation,
usedLatex: doc.usedLatex,
companyDivisionDepartment: doc.companyDivisionDepartment,
companyJobTitle: doc.companyJobTitle,
governmentJobTitle: doc.governmentJobTitle,
institutionName: doc.institutionName,
otherJobTitle: doc.otherJobTitle,
nonprofitDivisionDepartment: doc.nonprofitDivisionDepartment,
nonprofitJobTitle: doc.nonprofitJobTitle,
role: doc.role,
subjectArea: doc.subjectArea,
updatedAt: new Date(doc.updatedAt).toISOString(),
userId: doc._id.toString(), // _id is set as the userId
firstName: Boolean(doc.firstName).toString(),
lastName: Boolean(doc.lastName).toString(),
}
}
const runScript = async () => {
console.time('CSV Writing Duration')
console.log('Starting to write to csv file...')
const cursor = OnboardingDataCollection.find().cursor()
const csvWriter = csv.stringify({
header: true,
columns: [
'primaryOccupation',
'usedLatex',
'companyDivisionDepartment',
'companyJobTitle',
'governmentJobTitle',
'institutionName',
'otherJobTitle',
'nonprofitDivisionDepartment',
'nonprofitJobTitle',
'role',
'subjectArea',
'updatedAt',
'userId',
'firstName',
'lastName',
],
})
const writeStream = fs.createWriteStream('/tmp/onboardingDataCollection.csv')
csvWriter.pipe(writeStream)
let lineCount = 0
for (let doc = await cursor.next(); doc != null; doc = await cursor.next()) {
lineCount++
csvWriter.write(mapFields(doc))
}
csvWriter.end()
writeStream.on('finish', () => {
console.log(`Done writing to csv file. Total lines written: ${lineCount}`)
console.timeEnd('CSV Writing Duration')
process.exit()
})
writeStream.on('error', err => console.error('Write Stream Error:', err))
csvWriter.on('error', err => console.error('CSV Writer Error:', err))
}
runScript().catch(err => {
console.error(err)
process.exit(1)
})