From 9d12d98855a4a4bd25c4c7b543c5e48d5dbb0f96 Mon Sep 17 00:00:00 2001 From: Ersun Warncke Date: Mon, 30 Mar 2020 08:55:47 -0400 Subject: [PATCH] report/sync on mongo/postgres entitlment mismatches GitOrigin-RevId: 71258d5d7ca1b6700d70546f2c3be672f5524bef --- .../scripts/sync-user-entitlements/README.md | 40 ++++ .../sync-user-entitlements.js | 200 ++++++++++++++++++ 2 files changed, 240 insertions(+) create mode 100644 services/web/scripts/sync-user-entitlements/README.md create mode 100644 services/web/scripts/sync-user-entitlements/sync-user-entitlements.js diff --git a/services/web/scripts/sync-user-entitlements/README.md b/services/web/scripts/sync-user-entitlements/README.md new file mode 100644 index 0000000000..6d0379846b --- /dev/null +++ b/services/web/scripts/sync-user-entitlements/README.md @@ -0,0 +1,40 @@ +# Sync User Entitlements + +Entitlement information for insitutional (university) sso users is stored in +both the mongo users collection and the postgres v2_user_universities table. +The mongo users collection is authoratative but these need to be in sync for +everything to work properly. + +This script takes exports from both mongo and postgres, finds mismatches, and +then corrects the data in postgres so that it matches mongo. + +## Exporting users data from mongo + +Follow the directions in `google-ops/README.md` for exporting data from mongo +and copying the files to your local machine. + +Run the following user export command. +``` +mongoexport --uri $READ_ONLY_MONGO_CONNECTION_STRING --collection users --fields '_id,email,emails,samlIdentifiers' --query '{"samlIdentifiers.providerId": {"$exists": 1}}' --out user-entitlements.json +``` + +**Note: this file contains PII and caution must be exercised to insure that it +is never transferred or stored insecurely and that it is deleted ASAP** + +## Exporting data from postgres + +Connect to postgres by running `heroku psql -a electric-leaf-4093` + +Run the following v2_user_universities export comand. +``` +\copy (select uu.user_id, uu.email, uu.cached_entitlement, ud.university_id from v2_user_universities uu LEFT JOIN university_domains ud ON uu.university_domain_id = ud.id) to 'cached-entitlements.csv' with csv; +``` + +**Note: this file contains PII and caution must be exercised to insure that it +is never transferred or stored insecurely and that it is deleted ASAP** + +## Run sync + +``` +node scripts/sync-user-entitlements/sync-user-entitlements --user-entitlements user-entitlements.json --cached-entitlements cached-entitlements.csv --commit +``` \ No newline at end of file diff --git a/services/web/scripts/sync-user-entitlements/sync-user-entitlements.js b/services/web/scripts/sync-user-entitlements/sync-user-entitlements.js new file mode 100644 index 0000000000..6769cff948 --- /dev/null +++ b/services/web/scripts/sync-user-entitlements/sync-user-entitlements.js @@ -0,0 +1,200 @@ +'use strict' + +const fs = require('fs') +const minimist = require('minimist') + +const InstitutionsAPI = require('../../app/src/Features/Institutions/InstitutionsAPI') + .promises + +const argv = minimist(process.argv.slice(2)) +const commit = argv.commit !== undefined +const ignoreNulls = !!argv['ignore-nulls'] + +if (!commit) { + console.log('DOING DRY RUN. TO SAVE CHANGES PASS --commit') +} + +const userEntitlements = loadUserEntitlements(argv['user-entitlements']) +const cachedEntitlements = loadCachedEntitlements(argv['cached-entitlements']) + +syncUserEntitlements(userEntitlements, cachedEntitlements) + .catch(err => console.error(err.stack)) + .then(() => process.exit()) + +async function syncUserEntitlements(userEntitlements, cachedEntitlements) { + // check for user entitlements in mongo but not in postgres + for (const userEntitlement of Object.values(userEntitlements)) { + // find any email(s) that are linked through sso + for (const email of userEntitlement.emails) { + if (!email.samlProviderId) { + continue + } + // get samlIdentifiers entry for email + const samlIdentifier = userEntitlement.samlIdentifiers.find( + samlIdentifier => samlIdentifier.providerId === email.samlProviderId + ) + // validate that entitlement is cached + if (samlIdentifier) { + const cachedEntitlment = cachedEntitlements[email.email] + // validate that record is correct + if (cachedEntitlment) { + if ( + cachedEntitlment.hasEntitlement !== samlIdentifier.hasEntitlement + ) { + console.log( + `cached entitlement mismatch for user ${ + userEntitlement.userId + } mongo(${samlIdentifier.hasEntitlement}) postgres(${ + cachedEntitlment.hasEntitlement + })` + ) + await syncUserEntitlement( + userEntitlement.userId, + email.email, + samlIdentifier.hasEntitlement + ) + } + } + // there is not record in postgres at all + else { + console.log( + `missing cached entitlement for user ${userEntitlement.userId}` + ) + await syncUserEntitlement( + userEntitlement.userId, + email.email, + samlIdentifier.hasEntitlement + ) + } + } + // if identifier is missing for email this is internal inconsistency in mongo + else { + console.log(`missing samlIdentifier for user ${userEntitlement.userId}`) + } + } + + // find any samlIdentifier records missing email entry + for (const samlIdentifier of userEntitlement.samlIdentifiers) { + const email = userEntitlement.emails.find( + email => email.samlProviderId === samlIdentifier.providerId + ) + if (!email) { + console.log( + `missing email entry for samlIdentifier for user ${ + userEntitlement.userId + }` + ) + } + } + } + + // check for user entitlements in postgres but not in mongo + for (const cachedEntitlment of Object.values(cachedEntitlements)) { + if (!cachedEntitlment.hasEntitlement) { + continue + } + const userEntitlement = userEntitlements[cachedEntitlment.userId] + // validate that mongo has correct entitlement + if (userEntitlement) { + // find samlIdentifier for provider + const samlIdentifier = userEntitlement.samlIdentifiers.find( + samlIdentifier => + samlIdentifier.providerId === cachedEntitlment.providerId + ) + if (!samlIdentifier || !samlIdentifier.hasEntitlement) { + console.log( + `cached entitlement mismatch for user ${ + userEntitlement.userId + } mongo(false) postgres(true)` + ) + await syncUserEntitlement( + userEntitlement.userId, + cachedEntitlment.email, + false + ) + } + } + // if the record does not exist it is probably because users without + // entitlements were not exported + else { + console.log( + `missing cached entitlement in mongo for user ${ + cachedEntitlment.userId + }` + ) + } + } +} + +async function syncUserEntitlement(userId, email, hasEntitlement) { + if (!commit) { + return + } + try { + if (hasEntitlement) { + await InstitutionsAPI.addEntitlement(userId, email) + } else { + await InstitutionsAPI.removeEntitlement(userId, email) + } + } catch (err) { + console.error( + `error setting entitlement: ${userId}, ${email}, ${hasEntitlement} - ${ + err.message + }` + ) + } +} + +function loadUserEntitlements(userEntitlementsFilename) { + const userEntitlementsFile = fs.readFileSync(userEntitlementsFilename, { + encoding: 'utf8' + }) + + const userEntitlements = {} + + for (const userEntitlementLine of userEntitlementsFile.split('\n')) { + if (!userEntitlementLine) { + continue + } + const userEntitlementExport = JSON.parse(userEntitlementLine) + const userId = userEntitlementExport._id.$oid + delete userEntitlementExport._id + userEntitlementExport.userId = userId + userEntitlements[userId] = userEntitlementExport + } + + return userEntitlements +} + +function loadCachedEntitlements(cachedEntitlementsFilename) { + const cachedEntitlementsFile = fs.readFileSync(cachedEntitlementsFilename, { + encoding: 'utf8' + }) + + const cachedEntitlements = {} + + for (const cachedEntitlementLine of cachedEntitlementsFile.split('\n')) { + // this is safe because comma is not an allowed value for any column + const [ + userId, + email, + hasEntitlement, + providerId + ] = cachedEntitlementLine.split(',') + let hasEntitlementBoolean + if (ignoreNulls) { + hasEntitlementBoolean = hasEntitlement === 't' + } else { + hasEntitlementBoolean = + hasEntitlement === 't' ? true : hasEntitlement === 'f' ? false : null + } + cachedEntitlements[email] = { + email, + hasEntitlement: hasEntitlementBoolean, + providerId, + userId + } + } + + return cachedEntitlements +}