2020-08-19 05:11:39 -04:00
|
|
|
const { ReadPreference, ObjectId } = require('mongodb')
|
2020-10-07 09:17:49 -04:00
|
|
|
const { db, waitForDb } = require('../../app/src/infrastructure/mongodb')
|
2020-08-17 10:14:11 -04:00
|
|
|
|
|
|
|
const BATCH_SIZE = parseInt(process.env.BATCH_SIZE, 10) || 1000
|
2020-08-19 05:11:39 -04:00
|
|
|
let BATCH_LAST_ID
|
|
|
|
if (process.env.BATCH_LAST_ID) {
|
|
|
|
BATCH_LAST_ID = ObjectId(process.env.BATCH_LAST_ID)
|
|
|
|
}
|
2020-08-17 10:14:11 -04:00
|
|
|
|
2020-09-25 04:40:07 -04:00
|
|
|
async function getNextBatch(collection, query, maxId, projection) {
|
2020-10-21 05:48:30 -04:00
|
|
|
maxId = maxId || BATCH_LAST_ID
|
2020-08-17 10:14:11 -04:00
|
|
|
if (maxId) {
|
2020-12-16 05:37:00 -05:00
|
|
|
query._id = { $gt: maxId }
|
2020-08-17 10:14:11 -04:00
|
|
|
}
|
|
|
|
const entries = await collection
|
2020-10-07 09:17:49 -04:00
|
|
|
.find(query)
|
|
|
|
.project(projection)
|
2020-08-17 10:14:11 -04:00
|
|
|
.sort({ _id: 1 })
|
|
|
|
.limit(BATCH_SIZE)
|
|
|
|
.setReadPreference(ReadPreference.SECONDARY)
|
|
|
|
.toArray()
|
2020-09-25 04:40:07 -04:00
|
|
|
return entries
|
2020-08-17 10:14:11 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
async function performUpdate(collection, nextBatch, update) {
|
2020-09-25 04:40:07 -04:00
|
|
|
return collection.updateMany(
|
|
|
|
{ _id: { $in: nextBatch.map(entry => entry._id) } },
|
|
|
|
update
|
|
|
|
)
|
2020-08-17 10:14:11 -04:00
|
|
|
}
|
|
|
|
|
2020-09-25 04:40:07 -04:00
|
|
|
async function batchedUpdate(collectionName, query, update, projection) {
|
2020-10-07 09:17:49 -04:00
|
|
|
await waitForDb()
|
|
|
|
const collection = db[collectionName]
|
2020-08-17 10:14:11 -04:00
|
|
|
|
2020-09-25 04:40:07 -04:00
|
|
|
projection = projection || { _id: 1 }
|
2020-08-17 10:14:11 -04:00
|
|
|
let nextBatch
|
|
|
|
let updated = 0
|
2020-10-21 05:48:30 -04:00
|
|
|
let maxId
|
2020-09-25 04:40:07 -04:00
|
|
|
while (
|
|
|
|
(nextBatch = await getNextBatch(collection, query, maxId, projection))
|
|
|
|
.length
|
|
|
|
) {
|
|
|
|
maxId = nextBatch[nextBatch.length - 1]._id
|
2020-08-17 10:14:11 -04:00
|
|
|
updated += nextBatch.length
|
2020-11-04 04:53:26 -05:00
|
|
|
console.log(
|
|
|
|
`Running update on batch with ids ${JSON.stringify(
|
|
|
|
nextBatch.map(entry => entry._id)
|
|
|
|
)}`
|
|
|
|
)
|
2020-09-25 04:40:07 -04:00
|
|
|
|
|
|
|
if (typeof update === 'function') {
|
|
|
|
await update(collection, nextBatch)
|
|
|
|
} else {
|
|
|
|
await performUpdate(collection, nextBatch, update)
|
|
|
|
}
|
2020-11-04 04:53:26 -05:00
|
|
|
|
|
|
|
console.error(`Completed batch ending ${maxId}`)
|
2020-08-17 10:14:11 -04:00
|
|
|
}
|
|
|
|
return updated
|
|
|
|
}
|
|
|
|
|
|
|
|
function batchedUpdateWithResultHandling(collection, query, update) {
|
|
|
|
batchedUpdate(collection, query, update)
|
|
|
|
.then(updated => {
|
|
|
|
console.error({ updated })
|
|
|
|
process.exit(0)
|
|
|
|
})
|
|
|
|
.catch(error => {
|
|
|
|
console.error({ error })
|
|
|
|
process.exit(1)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = {
|
2020-10-21 05:48:30 -04:00
|
|
|
getNextBatch,
|
2020-08-17 10:14:11 -04:00
|
|
|
batchedUpdate,
|
|
|
|
batchedUpdateWithResultHandling
|
|
|
|
}
|