2020-02-17 12:34:21 -05:00
|
|
|
/* eslint-disable
|
|
|
|
camelcase,
|
|
|
|
no-unused-vars,
|
|
|
|
*/
|
|
|
|
// TODO: This file was created by bulk-decaffeinate.
|
|
|
|
// Fix any style issues and re-enable lint.
|
2020-02-17 12:34:04 -05:00
|
|
|
/*
|
|
|
|
* decaffeinate suggestions:
|
|
|
|
* DS101: Remove unnecessary use of Array.from
|
|
|
|
* DS102: Remove unnecessary code created because of implicit returns
|
|
|
|
* DS103: Rewrite code to no longer use __guard__
|
|
|
|
* DS205: Consider reworking code to avoid use of IIFEs
|
|
|
|
* DS207: Consider shorter variations of null checks
|
|
|
|
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
|
|
|
*/
|
2020-02-17 12:34:28 -05:00
|
|
|
let LIMIT, pending
|
|
|
|
let project_id, doc_id
|
2020-09-29 07:23:18 -04:00
|
|
|
const { callbackify } = require('util')
|
2021-07-12 12:47:16 -04:00
|
|
|
const Settings = require('@overleaf/settings')
|
2020-02-17 12:34:28 -05:00
|
|
|
const async = require('async')
|
|
|
|
const _ = require('underscore')
|
2020-09-29 07:23:18 -04:00
|
|
|
const { db, ObjectId, waitForDb, closeDb } = require('./mongodb')
|
2020-02-17 12:34:28 -05:00
|
|
|
const fs = require('fs')
|
2020-11-25 06:57:20 -05:00
|
|
|
const Metrics = require('@overleaf/metrics')
|
2020-02-17 12:34:28 -05:00
|
|
|
Metrics.initialize('track-changes')
|
2021-12-14 08:00:35 -05:00
|
|
|
const logger = require('@overleaf/logger')
|
2020-02-17 12:34:28 -05:00
|
|
|
logger.initialize('track-changes-packworker')
|
2020-02-17 12:34:04 -05:00
|
|
|
if ((Settings.sentry != null ? Settings.sentry.dsn : undefined) != null) {
|
2020-02-17 12:34:28 -05:00
|
|
|
logger.initializeErrorReporting(Settings.sentry.dsn)
|
2020-02-17 12:34:04 -05:00
|
|
|
}
|
|
|
|
|
2020-02-17 12:34:28 -05:00
|
|
|
const DAYS = 24 * 3600 * 1000
|
2020-02-17 12:34:04 -05:00
|
|
|
|
2020-02-17 12:34:28 -05:00
|
|
|
const LockManager = require('./LockManager')
|
|
|
|
const PackManager = require('./PackManager')
|
2020-02-17 12:34:04 -05:00
|
|
|
|
|
|
|
// this worker script is forked by the main process to look for
|
|
|
|
// document histories which can be archived
|
|
|
|
|
2020-02-17 12:34:28 -05:00
|
|
|
const source = process.argv[2]
|
|
|
|
const DOCUMENT_PACK_DELAY = Number(process.argv[3]) || 1000
|
|
|
|
const TIMEOUT = Number(process.argv[4]) || 30 * 60 * 1000
|
|
|
|
let COUNT = 0 // number processed
|
|
|
|
let TOTAL = 0 // total number to process
|
2020-02-17 12:34:04 -05:00
|
|
|
|
|
|
|
if (!source.match(/^[0-9]+$/)) {
|
2020-02-17 12:34:28 -05:00
|
|
|
const file = fs.readFileSync(source)
|
|
|
|
const result = (() => {
|
|
|
|
const result1 = []
|
|
|
|
for (const line of Array.from(file.toString().split('\n'))) {
|
|
|
|
;[project_id, doc_id] = Array.from(line.split(' '))
|
|
|
|
result1.push({ doc_id, project_id })
|
|
|
|
}
|
|
|
|
return result1
|
|
|
|
})()
|
2021-07-13 07:04:43 -04:00
|
|
|
pending = _.filter(result, row =>
|
|
|
|
__guard__(row != null ? row.doc_id : undefined, x =>
|
2020-02-17 12:34:28 -05:00
|
|
|
x.match(/^[a-f0-9]{24}$/)
|
|
|
|
)
|
|
|
|
)
|
2020-02-17 12:34:04 -05:00
|
|
|
} else {
|
2020-02-17 12:34:28 -05:00
|
|
|
LIMIT = Number(process.argv[2]) || 1000
|
2020-02-17 12:34:04 -05:00
|
|
|
}
|
|
|
|
|
2020-02-17 12:34:28 -05:00
|
|
|
let shutDownRequested = false
|
2020-06-04 04:24:21 -04:00
|
|
|
const shutDownTimer = setTimeout(function () {
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug('pack timed out, requesting shutdown')
|
2020-02-17 12:34:28 -05:00
|
|
|
// start the shutdown on the next pack
|
|
|
|
shutDownRequested = true
|
|
|
|
// do a hard shutdown after a further 5 minutes
|
2020-06-04 04:24:21 -04:00
|
|
|
const hardTimeout = setTimeout(function () {
|
2020-02-17 12:34:28 -05:00
|
|
|
logger.error('HARD TIMEOUT in pack archive worker')
|
|
|
|
return process.exit()
|
|
|
|
}, 5 * 60 * 1000)
|
|
|
|
return hardTimeout.unref()
|
|
|
|
}, TIMEOUT)
|
|
|
|
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug(
|
2020-02-17 12:34:28 -05:00
|
|
|
`checking for updates, limit=${LIMIT}, delay=${DOCUMENT_PACK_DELAY}, timeout=${TIMEOUT}`
|
|
|
|
)
|
2020-02-17 12:34:04 -05:00
|
|
|
|
2020-06-04 04:24:21 -04:00
|
|
|
const finish = function () {
|
2020-02-17 12:34:28 -05:00
|
|
|
if (shutDownTimer != null) {
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug('cancelling timeout')
|
2020-02-17 12:34:28 -05:00
|
|
|
clearTimeout(shutDownTimer)
|
|
|
|
}
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug('closing db')
|
2020-09-29 07:23:18 -04:00
|
|
|
callbackify(closeDb)(function () {
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug('closing LockManager Redis Connection')
|
2020-06-04 04:24:21 -04:00
|
|
|
return LockManager.close(function () {
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug(
|
2020-02-17 12:34:28 -05:00
|
|
|
{ processedCount: COUNT, allCount: TOTAL },
|
|
|
|
'ready to exit from pack archive worker'
|
|
|
|
)
|
2020-06-04 04:24:21 -04:00
|
|
|
const hardTimeout = setTimeout(function () {
|
2020-02-17 12:34:28 -05:00
|
|
|
logger.error('hard exit from pack archive worker')
|
|
|
|
return process.exit(1)
|
|
|
|
}, 5 * 1000)
|
|
|
|
return hardTimeout.unref()
|
|
|
|
})
|
|
|
|
})
|
|
|
|
}
|
2020-02-17 12:34:04 -05:00
|
|
|
|
2022-05-16 08:38:18 -04:00
|
|
|
process.on('exit', code => logger.debug({ code }, 'pack archive worker exited'))
|
2020-02-17 12:34:04 -05:00
|
|
|
|
2021-07-13 07:04:43 -04:00
|
|
|
const processUpdates = pending =>
|
2020-02-17 12:34:28 -05:00
|
|
|
async.eachSeries(
|
|
|
|
pending,
|
2020-06-04 04:24:21 -04:00
|
|
|
function (result, callback) {
|
2020-02-17 12:34:28 -05:00
|
|
|
let _id
|
|
|
|
;({ _id, project_id, doc_id } = result)
|
|
|
|
COUNT++
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug({ project_id, doc_id }, `processing ${COUNT}/${TOTAL}`)
|
2020-02-17 12:34:28 -05:00
|
|
|
if (project_id == null || doc_id == null) {
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug(
|
2020-02-17 12:34:28 -05:00
|
|
|
{ project_id, doc_id },
|
|
|
|
'skipping pack, missing project/doc id'
|
|
|
|
)
|
|
|
|
return callback()
|
|
|
|
}
|
2020-06-04 04:24:21 -04:00
|
|
|
const handler = function (err, result) {
|
2020-02-17 12:34:28 -05:00
|
|
|
if (err != null && err.code === 'InternalError' && err.retryable) {
|
|
|
|
logger.warn(
|
|
|
|
{ err, result },
|
|
|
|
'ignoring S3 error in pack archive worker'
|
|
|
|
)
|
|
|
|
// Ignore any s3 errors due to random problems
|
|
|
|
err = null
|
|
|
|
}
|
|
|
|
if (err != null) {
|
|
|
|
logger.error({ err, result }, 'error in pack archive worker')
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
if (shutDownRequested) {
|
|
|
|
logger.warn('shutting down pack archive worker')
|
|
|
|
return callback(new Error('shutdown'))
|
|
|
|
}
|
|
|
|
return setTimeout(() => callback(err, result), DOCUMENT_PACK_DELAY)
|
|
|
|
}
|
|
|
|
if (_id == null) {
|
|
|
|
return PackManager.pushOldPacks(project_id, doc_id, handler)
|
|
|
|
} else {
|
|
|
|
return PackManager.processOldPack(project_id, doc_id, _id, handler)
|
|
|
|
}
|
|
|
|
},
|
2020-06-04 04:24:21 -04:00
|
|
|
function (err, results) {
|
2020-02-17 12:34:28 -05:00
|
|
|
if (err != null && err.message !== 'shutdown') {
|
|
|
|
logger.error({ err }, 'error in pack archive worker processUpdates')
|
|
|
|
}
|
|
|
|
return finish()
|
|
|
|
}
|
|
|
|
)
|
2020-02-17 12:34:04 -05:00
|
|
|
// find the packs which can be archived
|
|
|
|
|
2020-06-04 04:24:21 -04:00
|
|
|
const ObjectIdFromDate = function (date) {
|
2020-02-17 12:34:28 -05:00
|
|
|
const id = Math.floor(date.getTime() / 1000).toString(16) + '0000000000000000'
|
|
|
|
return ObjectId(id)
|
|
|
|
}
|
2020-02-17 12:34:04 -05:00
|
|
|
|
|
|
|
// new approach, two passes
|
|
|
|
// find packs to be marked as finalised:true, those which have a newer pack present
|
|
|
|
// then only consider finalised:true packs for archiving
|
|
|
|
|
2020-09-29 07:22:22 -04:00
|
|
|
waitForDb()
|
|
|
|
.then(() => {
|
|
|
|
if (pending != null) {
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug(`got ${pending.length} entries from ${source}`)
|
2020-09-29 07:22:22 -04:00
|
|
|
processUpdates(pending)
|
|
|
|
} else {
|
|
|
|
processFromOneWeekAgo()
|
|
|
|
}
|
|
|
|
})
|
2021-07-13 07:04:43 -04:00
|
|
|
.catch(err => {
|
2020-09-29 07:22:22 -04:00
|
|
|
logger.fatal({ err }, 'cannot connect to mongo, exiting')
|
|
|
|
process.exit(1)
|
|
|
|
})
|
|
|
|
|
|
|
|
function processFromOneWeekAgo() {
|
2020-02-17 12:34:28 -05:00
|
|
|
const oneWeekAgo = new Date(Date.now() - 7 * DAYS)
|
|
|
|
db.docHistory
|
|
|
|
.find(
|
|
|
|
{
|
|
|
|
expiresAt: { $exists: false },
|
|
|
|
project_id: { $exists: true },
|
|
|
|
v_end: { $exists: true },
|
|
|
|
_id: { $lt: ObjectIdFromDate(oneWeekAgo) },
|
2021-07-13 07:04:43 -04:00
|
|
|
last_checked: { $lt: oneWeekAgo },
|
2020-02-17 12:34:28 -05:00
|
|
|
},
|
2020-09-10 07:58:06 -04:00
|
|
|
{ projection: { _id: 1, doc_id: 1, project_id: 1 } }
|
2020-02-17 12:34:28 -05:00
|
|
|
)
|
|
|
|
.sort({
|
2021-07-13 07:04:43 -04:00
|
|
|
last_checked: 1,
|
2020-02-17 12:34:28 -05:00
|
|
|
})
|
2020-09-10 09:54:18 -04:00
|
|
|
.limit(LIMIT)
|
|
|
|
.toArray(function (err, results) {
|
2020-02-17 12:34:28 -05:00
|
|
|
if (err != null) {
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug({ err }, 'error checking for updates')
|
2020-02-17 12:34:28 -05:00
|
|
|
finish()
|
|
|
|
return
|
|
|
|
}
|
2021-07-13 07:04:43 -04:00
|
|
|
pending = _.uniq(results, false, result => result.doc_id.toString())
|
2020-02-17 12:34:28 -05:00
|
|
|
TOTAL = pending.length
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug(`found ${TOTAL} documents to archive`)
|
2020-02-17 12:34:28 -05:00
|
|
|
return processUpdates(pending)
|
|
|
|
})
|
2020-02-17 12:34:04 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
function __guard__(value, transform) {
|
2020-02-17 12:34:28 -05:00
|
|
|
return typeof value !== 'undefined' && value !== null
|
|
|
|
? transform(value)
|
|
|
|
: undefined
|
|
|
|
}
|