report/sync on mongo/postgres entitlment mismatches

GitOrigin-RevId: 71258d5d7ca1b6700d70546f2c3be672f5524bef
This commit is contained in:
Ersun Warncke 2020-03-30 08:55:47 -04:00 committed by Copybot
parent be7a1abb13
commit 9d12d98855
2 changed files with 240 additions and 0 deletions

View file

@ -0,0 +1,40 @@
# Sync User Entitlements
Entitlement information for insitutional (university) sso users is stored in
both the mongo users collection and the postgres v2_user_universities table.
The mongo users collection is authoratative but these need to be in sync for
everything to work properly.
This script takes exports from both mongo and postgres, finds mismatches, and
then corrects the data in postgres so that it matches mongo.
## Exporting users data from mongo
Follow the directions in `google-ops/README.md` for exporting data from mongo
and copying the files to your local machine.
Run the following user export command.
```
mongoexport --uri $READ_ONLY_MONGO_CONNECTION_STRING --collection users --fields '_id,email,emails,samlIdentifiers' --query '{"samlIdentifiers.providerId": {"$exists": 1}}' --out user-entitlements.json
```
**Note: this file contains PII and caution must be exercised to insure that it
is never transferred or stored insecurely and that it is deleted ASAP**
## Exporting data from postgres
Connect to postgres by running `heroku psql -a electric-leaf-4093`
Run the following v2_user_universities export comand.
```
\copy (select uu.user_id, uu.email, uu.cached_entitlement, ud.university_id from v2_user_universities uu LEFT JOIN university_domains ud ON uu.university_domain_id = ud.id) to 'cached-entitlements.csv' with csv;
```
**Note: this file contains PII and caution must be exercised to insure that it
is never transferred or stored insecurely and that it is deleted ASAP**
## Run sync
```
node scripts/sync-user-entitlements/sync-user-entitlements --user-entitlements user-entitlements.json --cached-entitlements cached-entitlements.csv --commit
```

View file

@ -0,0 +1,200 @@
'use strict'
const fs = require('fs')
const minimist = require('minimist')
const InstitutionsAPI = require('../../app/src/Features/Institutions/InstitutionsAPI')
.promises
const argv = minimist(process.argv.slice(2))
const commit = argv.commit !== undefined
const ignoreNulls = !!argv['ignore-nulls']
if (!commit) {
console.log('DOING DRY RUN. TO SAVE CHANGES PASS --commit')
}
const userEntitlements = loadUserEntitlements(argv['user-entitlements'])
const cachedEntitlements = loadCachedEntitlements(argv['cached-entitlements'])
syncUserEntitlements(userEntitlements, cachedEntitlements)
.catch(err => console.error(err.stack))
.then(() => process.exit())
async function syncUserEntitlements(userEntitlements, cachedEntitlements) {
// check for user entitlements in mongo but not in postgres
for (const userEntitlement of Object.values(userEntitlements)) {
// find any email(s) that are linked through sso
for (const email of userEntitlement.emails) {
if (!email.samlProviderId) {
continue
}
// get samlIdentifiers entry for email
const samlIdentifier = userEntitlement.samlIdentifiers.find(
samlIdentifier => samlIdentifier.providerId === email.samlProviderId
)
// validate that entitlement is cached
if (samlIdentifier) {
const cachedEntitlment = cachedEntitlements[email.email]
// validate that record is correct
if (cachedEntitlment) {
if (
cachedEntitlment.hasEntitlement !== samlIdentifier.hasEntitlement
) {
console.log(
`cached entitlement mismatch for user ${
userEntitlement.userId
} mongo(${samlIdentifier.hasEntitlement}) postgres(${
cachedEntitlment.hasEntitlement
})`
)
await syncUserEntitlement(
userEntitlement.userId,
email.email,
samlIdentifier.hasEntitlement
)
}
}
// there is not record in postgres at all
else {
console.log(
`missing cached entitlement for user ${userEntitlement.userId}`
)
await syncUserEntitlement(
userEntitlement.userId,
email.email,
samlIdentifier.hasEntitlement
)
}
}
// if identifier is missing for email this is internal inconsistency in mongo
else {
console.log(`missing samlIdentifier for user ${userEntitlement.userId}`)
}
}
// find any samlIdentifier records missing email entry
for (const samlIdentifier of userEntitlement.samlIdentifiers) {
const email = userEntitlement.emails.find(
email => email.samlProviderId === samlIdentifier.providerId
)
if (!email) {
console.log(
`missing email entry for samlIdentifier for user ${
userEntitlement.userId
}`
)
}
}
}
// check for user entitlements in postgres but not in mongo
for (const cachedEntitlment of Object.values(cachedEntitlements)) {
if (!cachedEntitlment.hasEntitlement) {
continue
}
const userEntitlement = userEntitlements[cachedEntitlment.userId]
// validate that mongo has correct entitlement
if (userEntitlement) {
// find samlIdentifier for provider
const samlIdentifier = userEntitlement.samlIdentifiers.find(
samlIdentifier =>
samlIdentifier.providerId === cachedEntitlment.providerId
)
if (!samlIdentifier || !samlIdentifier.hasEntitlement) {
console.log(
`cached entitlement mismatch for user ${
userEntitlement.userId
} mongo(false) postgres(true)`
)
await syncUserEntitlement(
userEntitlement.userId,
cachedEntitlment.email,
false
)
}
}
// if the record does not exist it is probably because users without
// entitlements were not exported
else {
console.log(
`missing cached entitlement in mongo for user ${
cachedEntitlment.userId
}`
)
}
}
}
async function syncUserEntitlement(userId, email, hasEntitlement) {
if (!commit) {
return
}
try {
if (hasEntitlement) {
await InstitutionsAPI.addEntitlement(userId, email)
} else {
await InstitutionsAPI.removeEntitlement(userId, email)
}
} catch (err) {
console.error(
`error setting entitlement: ${userId}, ${email}, ${hasEntitlement} - ${
err.message
}`
)
}
}
function loadUserEntitlements(userEntitlementsFilename) {
const userEntitlementsFile = fs.readFileSync(userEntitlementsFilename, {
encoding: 'utf8'
})
const userEntitlements = {}
for (const userEntitlementLine of userEntitlementsFile.split('\n')) {
if (!userEntitlementLine) {
continue
}
const userEntitlementExport = JSON.parse(userEntitlementLine)
const userId = userEntitlementExport._id.$oid
delete userEntitlementExport._id
userEntitlementExport.userId = userId
userEntitlements[userId] = userEntitlementExport
}
return userEntitlements
}
function loadCachedEntitlements(cachedEntitlementsFilename) {
const cachedEntitlementsFile = fs.readFileSync(cachedEntitlementsFilename, {
encoding: 'utf8'
})
const cachedEntitlements = {}
for (const cachedEntitlementLine of cachedEntitlementsFile.split('\n')) {
// this is safe because comma is not an allowed value for any column
const [
userId,
email,
hasEntitlement,
providerId
] = cachedEntitlementLine.split(',')
let hasEntitlementBoolean
if (ignoreNulls) {
hasEntitlementBoolean = hasEntitlement === 't'
} else {
hasEntitlementBoolean =
hasEntitlement === 't' ? true : hasEntitlement === 'f' ? false : null
}
cachedEntitlements[email] = {
email,
hasEntitlement: hasEntitlementBoolean,
providerId,
userId
}
}
return cachedEntitlements
}