2021-07-12 16:47:15 +00:00
|
|
|
const Settings = require('@overleaf/settings')
|
2020-11-10 11:32:04 +00:00
|
|
|
const rclient = require('@overleaf/redis-wrapper').createClient(
|
2020-05-06 10:09:33 +00:00
|
|
|
Settings.redis.documentupdater
|
|
|
|
)
|
2021-10-06 09:10:28 +00:00
|
|
|
const logger = require('@overleaf/logger')
|
2024-05-29 13:37:27 +00:00
|
|
|
const OError = require('@overleaf/o-error')
|
2024-01-30 15:35:54 +00:00
|
|
|
const { promisifyAll } = require('@overleaf/promise-utils')
|
2020-05-06 10:09:33 +00:00
|
|
|
const metrics = require('./Metrics')
|
|
|
|
const Errors = require('./Errors')
|
2024-11-08 10:21:56 +00:00
|
|
|
const crypto = require('node:crypto')
|
2020-05-06 10:09:33 +00:00
|
|
|
const async = require('async')
|
2021-09-20 08:23:37 +00:00
|
|
|
const { docIsTooLarge } = require('./Limits')
|
2020-05-06 10:08:21 +00:00
|
|
|
|
|
|
|
// Sometimes Redis calls take an unexpectedly long time. We have to be
|
|
|
|
// quick with Redis calls because we're holding a lock that expires
|
|
|
|
// after 30 seconds. We can't let any errors in the rest of the stack
|
|
|
|
// hold us up, and need to bail out quickly if there is a problem.
|
2020-05-06 10:09:33 +00:00
|
|
|
const MAX_REDIS_REQUEST_LENGTH = 5000 // 5 seconds
|
2024-07-02 13:55:35 +00:00
|
|
|
const PROJECT_BLOCK_TTL_SECS = 30
|
2020-05-06 10:08:21 +00:00
|
|
|
|
|
|
|
// Make times easy to read
|
2020-05-06 10:09:33 +00:00
|
|
|
const minutes = 60 // seconds for Redis expire
|
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
const logHashReadErrors = Settings.documentupdater?.logHashErrors?.read
|
2020-05-06 10:09:33 +00:00
|
|
|
|
|
|
|
const MEGABYTES = 1024 * 1024
|
|
|
|
const MAX_RANGES_SIZE = 3 * MEGABYTES
|
|
|
|
|
|
|
|
const keys = Settings.redis.documentupdater.key_schema
|
|
|
|
|
2024-01-30 15:35:54 +00:00
|
|
|
const RedisManager = {
|
2020-05-06 10:09:33 +00:00
|
|
|
rclient,
|
|
|
|
|
2023-09-05 21:07:11 +00:00
|
|
|
putDocInMemory(
|
|
|
|
projectId,
|
|
|
|
docId,
|
|
|
|
docLines,
|
|
|
|
version,
|
|
|
|
ranges,
|
2024-05-28 11:24:06 +00:00
|
|
|
resolvedCommentIds,
|
2023-09-05 21:07:11 +00:00
|
|
|
pathname,
|
|
|
|
projectHistoryId,
|
2024-02-09 14:09:40 +00:00
|
|
|
historyRangesSupport,
|
2023-09-05 21:07:11 +00:00
|
|
|
_callback
|
|
|
|
) {
|
2020-05-06 10:09:33 +00:00
|
|
|
const timer = new metrics.Timer('redis.put-doc')
|
2021-11-30 13:26:20 +00:00
|
|
|
const callback = error => {
|
2020-05-06 10:09:33 +00:00
|
|
|
timer.done()
|
2021-11-30 13:26:20 +00:00
|
|
|
_callback(error)
|
2020-05-06 10:09:33 +00:00
|
|
|
}
|
2023-09-05 21:07:11 +00:00
|
|
|
const docLinesArray = docLines
|
|
|
|
docLines = JSON.stringify(docLines)
|
|
|
|
if (docLines.indexOf('\u0000') !== -1) {
|
2020-05-06 10:09:33 +00:00
|
|
|
const error = new Error('null bytes found in doc lines')
|
|
|
|
// this check was added to catch memory corruption in JSON.stringify.
|
|
|
|
// It sometimes returned null bytes at the end of the string.
|
2023-09-05 21:07:11 +00:00
|
|
|
logger.error({ err: error, docId, docLines }, error.message)
|
2020-05-06 10:09:33 +00:00
|
|
|
return callback(error)
|
|
|
|
}
|
2021-09-20 08:23:37 +00:00
|
|
|
// Do an optimised size check on the docLines using the serialised
|
|
|
|
// length as an upper bound
|
2023-09-05 21:07:11 +00:00
|
|
|
const sizeBound = docLines.length
|
|
|
|
if (docIsTooLarge(sizeBound, docLinesArray, Settings.max_doc_length)) {
|
|
|
|
const docSize = docLines.length
|
2021-05-06 16:19:23 +00:00
|
|
|
const err = new Error('blocking doc insert into redis: doc is too large')
|
2021-11-30 13:26:20 +00:00
|
|
|
logger.error({ projectId, docId, err, docSize }, err.message)
|
2021-05-06 16:19:23 +00:00
|
|
|
return callback(err)
|
|
|
|
}
|
2023-09-05 21:07:11 +00:00
|
|
|
const docHash = RedisManager._computeHash(docLines)
|
2020-05-06 10:09:33 +00:00
|
|
|
// record bytes sent to redis
|
2023-09-05 21:07:11 +00:00
|
|
|
metrics.summary('redis.docLines', docLines.length, { status: 'set' })
|
2021-09-30 08:28:32 +00:00
|
|
|
logger.debug(
|
2023-09-05 21:07:11 +00:00
|
|
|
{ projectId, docId, version, docHash, pathname, projectHistoryId },
|
2020-05-06 10:09:33 +00:00
|
|
|
'putting doc in redis'
|
|
|
|
)
|
2023-09-05 21:07:11 +00:00
|
|
|
RedisManager._serializeRanges(ranges, (error, ranges) => {
|
2021-11-30 13:26:20 +00:00
|
|
|
if (error) {
|
|
|
|
logger.error({ err: error, docId, projectId }, error.message)
|
2020-05-06 10:09:33 +00:00
|
|
|
return callback(error)
|
|
|
|
}
|
2021-11-22 10:08:43 +00:00
|
|
|
|
2024-07-02 13:55:35 +00:00
|
|
|
// update docsInProject set before writing doc contents
|
|
|
|
const multi = rclient.multi()
|
|
|
|
multi.exists(keys.projectBlock({ project_id: projectId }))
|
|
|
|
multi.sadd(keys.docsInProject({ project_id: projectId }), docId)
|
|
|
|
multi.exec((err, reply) => {
|
|
|
|
if (err) {
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
const projectBlocked = reply[0] === 1
|
|
|
|
if (projectBlocked) {
|
|
|
|
// We don't clean up the spurious docId added in the docsInProject
|
|
|
|
// set. There is a risk that the docId was successfully added by a
|
|
|
|
// concurrent process. This set is used when unloading projects. An
|
|
|
|
// extra docId will not prevent the project from being uploaded, but
|
|
|
|
// a missing docId means that the doc might stay in Redis forever.
|
|
|
|
return callback(
|
|
|
|
new OError('Project blocked from loading docs', { projectId })
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
RedisManager.setHistoryRangesSupportFlag(
|
|
|
|
docId,
|
|
|
|
historyRangesSupport,
|
|
|
|
err => {
|
|
|
|
if (err) {
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!pathname) {
|
|
|
|
metrics.inc('pathname', 1, {
|
|
|
|
path: 'RedisManager.setDoc',
|
|
|
|
status: pathname === '' ? 'zero-length' : 'undefined',
|
2024-05-29 15:50:34 +00:00
|
|
|
})
|
2024-07-02 13:55:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Make sure that this MULTI operation only operates on doc
|
|
|
|
// specific keys, i.e. keys that have the doc id in curly braces.
|
|
|
|
// The curly braces identify a hash key for Redis and ensures that
|
|
|
|
// the MULTI's operations are all done on the same node in a
|
|
|
|
// cluster environment.
|
|
|
|
const multi = rclient.multi()
|
|
|
|
multi.mset({
|
|
|
|
[keys.docLines({ doc_id: docId })]: docLines,
|
|
|
|
[keys.projectKey({ doc_id: docId })]: projectId,
|
|
|
|
[keys.docVersion({ doc_id: docId })]: version,
|
|
|
|
[keys.docHash({ doc_id: docId })]: docHash,
|
|
|
|
[keys.ranges({ doc_id: docId })]: ranges,
|
|
|
|
[keys.pathname({ doc_id: docId })]: pathname,
|
|
|
|
[keys.projectHistoryId({ doc_id: docId })]: projectHistoryId,
|
|
|
|
})
|
|
|
|
if (historyRangesSupport) {
|
|
|
|
multi.del(keys.resolvedCommentIds({ doc_id: docId }))
|
|
|
|
if (resolvedCommentIds.length > 0) {
|
|
|
|
multi.sadd(
|
|
|
|
keys.resolvedCommentIds({ doc_id: docId }),
|
|
|
|
...resolvedCommentIds
|
|
|
|
)
|
2024-05-29 15:50:34 +00:00
|
|
|
}
|
2024-05-29 13:37:27 +00:00
|
|
|
}
|
2024-07-02 13:55:35 +00:00
|
|
|
multi.exec(err => {
|
|
|
|
if (err) {
|
|
|
|
callback(
|
|
|
|
OError.tag(err, 'failed to write doc to Redis in MULTI', {
|
|
|
|
previousErrors: err.previousErrors.map(e => ({
|
|
|
|
name: e.name,
|
|
|
|
message: e.message,
|
|
|
|
command: e.command,
|
|
|
|
})),
|
|
|
|
})
|
|
|
|
)
|
|
|
|
} else {
|
|
|
|
callback()
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
)
|
|
|
|
})
|
2020-05-06 10:09:33 +00:00
|
|
|
})
|
|
|
|
},
|
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
removeDocFromMemory(projectId, docId, _callback) {
|
|
|
|
logger.debug({ projectId, docId }, 'removing doc from redis')
|
|
|
|
const callback = err => {
|
|
|
|
if (err) {
|
|
|
|
logger.err({ projectId, docId, err }, 'error removing doc from redis')
|
|
|
|
_callback(err)
|
2020-05-06 10:09:33 +00:00
|
|
|
} else {
|
2021-11-30 13:26:20 +00:00
|
|
|
logger.debug({ projectId, docId }, 'removed doc from redis')
|
|
|
|
_callback()
|
2020-05-06 10:09:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-29 15:50:34 +00:00
|
|
|
// Make sure that this MULTI operation only operates on doc
|
|
|
|
// specific keys, i.e. keys that have the doc id in curly braces.
|
|
|
|
// The curly braces identify a hash key for Redis and ensures that
|
|
|
|
// the MULTI's operations are all done on the same node in a
|
|
|
|
// cluster environment.
|
2020-05-06 10:09:33 +00:00
|
|
|
let multi = rclient.multi()
|
2021-11-30 13:26:20 +00:00
|
|
|
multi.strlen(keys.docLines({ doc_id: docId }))
|
2021-03-28 11:30:51 +00:00
|
|
|
multi.del(
|
2021-11-30 13:26:20 +00:00
|
|
|
keys.docLines({ doc_id: docId }),
|
|
|
|
keys.projectKey({ doc_id: docId }),
|
|
|
|
keys.docVersion({ doc_id: docId }),
|
|
|
|
keys.docHash({ doc_id: docId }),
|
|
|
|
keys.ranges({ doc_id: docId }),
|
|
|
|
keys.pathname({ doc_id: docId }),
|
|
|
|
keys.projectHistoryId({ doc_id: docId }),
|
|
|
|
keys.unflushedTime({ doc_id: docId }),
|
|
|
|
keys.lastUpdatedAt({ doc_id: docId }),
|
2024-05-28 11:24:06 +00:00
|
|
|
keys.lastUpdatedBy({ doc_id: docId }),
|
|
|
|
keys.resolvedCommentIds({ doc_id: docId })
|
2021-03-28 11:30:51 +00:00
|
|
|
)
|
2021-11-30 13:26:20 +00:00
|
|
|
multi.exec((error, response) => {
|
|
|
|
if (error) {
|
2020-05-06 10:09:33 +00:00
|
|
|
return callback(error)
|
|
|
|
}
|
2021-11-30 13:26:20 +00:00
|
|
|
const length = response?.[0]
|
2020-05-06 10:09:33 +00:00
|
|
|
if (length > 0) {
|
|
|
|
// record bytes freed in redis
|
|
|
|
metrics.summary('redis.docLines', length, { status: 'del' })
|
|
|
|
}
|
2024-05-29 15:50:34 +00:00
|
|
|
|
|
|
|
// Make sure that this MULTI operation only operates on project
|
|
|
|
// specific keys, i.e. keys that have the project id in curly braces.
|
|
|
|
// The curly braces identify a hash key for Redis and ensures that
|
|
|
|
// the MULTI's operations are all done on the same node in a
|
|
|
|
// cluster environment.
|
2020-05-06 10:09:33 +00:00
|
|
|
multi = rclient.multi()
|
2021-11-30 13:26:20 +00:00
|
|
|
multi.srem(keys.docsInProject({ project_id: projectId }), docId)
|
|
|
|
multi.del(keys.projectState({ project_id: projectId }))
|
2024-02-09 14:09:40 +00:00
|
|
|
multi.exec(err => {
|
|
|
|
if (err) {
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
rclient.srem(keys.historyRangesSupport(), docId, callback)
|
|
|
|
})
|
2020-05-06 10:09:33 +00:00
|
|
|
})
|
|
|
|
},
|
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
checkOrSetProjectState(projectId, newState, callback) {
|
2024-05-29 15:50:34 +00:00
|
|
|
// Make sure that this MULTI operation only operates on project
|
|
|
|
// specific keys, i.e. keys that have the project id in curly braces.
|
|
|
|
// The curly braces identify a hash key for Redis and ensures that
|
|
|
|
// the MULTI's operations are all done on the same node in a
|
|
|
|
// cluster environment.
|
2020-05-06 10:09:33 +00:00
|
|
|
const multi = rclient.multi()
|
2021-11-30 13:26:20 +00:00
|
|
|
multi.getset(keys.projectState({ project_id: projectId }), newState)
|
|
|
|
multi.expire(keys.projectState({ project_id: projectId }), 30 * minutes)
|
|
|
|
multi.exec((error, response) => {
|
|
|
|
if (error) {
|
2020-05-06 10:09:33 +00:00
|
|
|
return callback(error)
|
|
|
|
}
|
2021-09-30 08:28:32 +00:00
|
|
|
logger.debug(
|
2021-11-30 13:26:20 +00:00
|
|
|
{ projectId, newState, oldState: response[0] },
|
2020-05-06 10:09:33 +00:00
|
|
|
'checking project state'
|
|
|
|
)
|
2021-11-30 13:26:20 +00:00
|
|
|
callback(null, response[0] !== newState)
|
2020-05-06 10:09:33 +00:00
|
|
|
})
|
|
|
|
},
|
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
clearProjectState(projectId, callback) {
|
|
|
|
rclient.del(keys.projectState({ project_id: projectId }), callback)
|
2020-05-06 10:09:33 +00:00
|
|
|
},
|
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
getDoc(projectId, docId, callback) {
|
2020-05-06 10:09:33 +00:00
|
|
|
const timer = new metrics.Timer('redis.get-doc')
|
2021-03-28 11:30:51 +00:00
|
|
|
const collectKeys = [
|
2021-11-30 13:26:20 +00:00
|
|
|
keys.docLines({ doc_id: docId }),
|
|
|
|
keys.docVersion({ doc_id: docId }),
|
|
|
|
keys.docHash({ doc_id: docId }),
|
|
|
|
keys.projectKey({ doc_id: docId }),
|
|
|
|
keys.ranges({ doc_id: docId }),
|
|
|
|
keys.pathname({ doc_id: docId }),
|
|
|
|
keys.projectHistoryId({ doc_id: docId }),
|
|
|
|
keys.unflushedTime({ doc_id: docId }),
|
|
|
|
keys.lastUpdatedAt({ doc_id: docId }),
|
|
|
|
keys.lastUpdatedBy({ doc_id: docId }),
|
2021-03-28 11:30:51 +00:00
|
|
|
]
|
2021-11-30 13:26:20 +00:00
|
|
|
rclient.mget(...collectKeys, (error, result) => {
|
|
|
|
if (error) {
|
|
|
|
return callback(error)
|
|
|
|
}
|
2020-05-06 10:09:33 +00:00
|
|
|
let [
|
|
|
|
docLines,
|
|
|
|
version,
|
|
|
|
storedHash,
|
2021-11-30 13:26:20 +00:00
|
|
|
docProjectId,
|
2020-05-06 10:09:33 +00:00
|
|
|
ranges,
|
|
|
|
pathname,
|
|
|
|
projectHistoryId,
|
|
|
|
unflushedTime,
|
|
|
|
lastUpdatedAt,
|
2021-07-13 11:04:42 +00:00
|
|
|
lastUpdatedBy,
|
2021-11-30 13:26:20 +00:00
|
|
|
] = result
|
2024-02-09 14:09:40 +00:00
|
|
|
rclient.sismember(keys.historyRangesSupport(), docId, (error, result) => {
|
|
|
|
if (error) {
|
|
|
|
return callback(error)
|
|
|
|
}
|
2024-05-28 11:24:06 +00:00
|
|
|
rclient.smembers(
|
|
|
|
keys.resolvedCommentIds({ doc_id: docId }),
|
|
|
|
(error, resolvedCommentIds) => {
|
|
|
|
if (error) {
|
|
|
|
return callback(error)
|
|
|
|
}
|
2020-05-06 10:09:33 +00:00
|
|
|
|
2024-05-28 11:24:06 +00:00
|
|
|
const historyRangesSupport = result === 1
|
2020-05-06 10:09:33 +00:00
|
|
|
|
2024-05-28 11:24:06 +00:00
|
|
|
const timeSpan = timer.done()
|
|
|
|
// check if request took too long and bail out. only do this for
|
|
|
|
// get, because it is the first call in each update, so if this
|
|
|
|
// passes we'll assume others have a reasonable chance to succeed.
|
|
|
|
if (timeSpan > MAX_REDIS_REQUEST_LENGTH) {
|
|
|
|
error = new Error('redis getDoc exceeded timeout')
|
|
|
|
return callback(error)
|
|
|
|
}
|
|
|
|
// record bytes loaded from redis
|
|
|
|
if (docLines != null) {
|
|
|
|
metrics.summary('redis.docLines', docLines.length, {
|
|
|
|
status: 'get',
|
|
|
|
})
|
|
|
|
}
|
|
|
|
// check sha1 hash value if present
|
|
|
|
if (docLines != null && storedHash != null) {
|
|
|
|
const computedHash = RedisManager._computeHash(docLines)
|
|
|
|
if (logHashReadErrors && computedHash !== storedHash) {
|
|
|
|
logger.error(
|
|
|
|
{
|
|
|
|
projectId,
|
|
|
|
docId,
|
|
|
|
docProjectId,
|
|
|
|
computedHash,
|
|
|
|
storedHash,
|
|
|
|
docLines,
|
|
|
|
},
|
|
|
|
'hash mismatch on retrieved document'
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2020-05-06 10:09:33 +00:00
|
|
|
|
2024-05-28 11:24:06 +00:00
|
|
|
try {
|
|
|
|
docLines = JSON.parse(docLines)
|
|
|
|
ranges = RedisManager._deserializeRanges(ranges)
|
|
|
|
} catch (e) {
|
|
|
|
return callback(e)
|
|
|
|
}
|
|
|
|
|
|
|
|
version = parseInt(version || 0, 10)
|
|
|
|
// check doc is in requested project
|
|
|
|
if (docProjectId != null && docProjectId !== projectId) {
|
|
|
|
logger.error(
|
|
|
|
{ projectId, docId, docProjectId },
|
|
|
|
'doc not in project'
|
|
|
|
)
|
|
|
|
return callback(new Errors.NotFoundError('document not found'))
|
|
|
|
}
|
|
|
|
|
|
|
|
if (docLines && version && !pathname) {
|
|
|
|
metrics.inc('pathname', 1, {
|
|
|
|
path: 'RedisManager.getDoc',
|
|
|
|
status: pathname === '' ? 'zero-length' : 'undefined',
|
|
|
|
})
|
|
|
|
}
|
2021-11-22 10:08:43 +00:00
|
|
|
|
2024-05-28 11:24:06 +00:00
|
|
|
callback(
|
|
|
|
null,
|
|
|
|
docLines,
|
|
|
|
version,
|
|
|
|
ranges,
|
|
|
|
pathname,
|
|
|
|
projectHistoryId,
|
|
|
|
unflushedTime,
|
|
|
|
lastUpdatedAt,
|
|
|
|
lastUpdatedBy,
|
|
|
|
historyRangesSupport,
|
|
|
|
resolvedCommentIds
|
|
|
|
)
|
|
|
|
}
|
2024-02-09 14:09:40 +00:00
|
|
|
)
|
|
|
|
})
|
2020-05-06 10:09:33 +00:00
|
|
|
})
|
|
|
|
},
|
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
getDocVersion(docId, callback) {
|
2023-06-06 10:19:55 +00:00
|
|
|
rclient.mget(keys.docVersion({ doc_id: docId }), (error, result) => {
|
|
|
|
if (error) {
|
|
|
|
return callback(error)
|
2020-05-06 10:09:33 +00:00
|
|
|
}
|
2023-06-06 10:19:55 +00:00
|
|
|
let [version] = result || []
|
|
|
|
version = parseInt(version, 10)
|
|
|
|
callback(null, version)
|
|
|
|
})
|
2020-05-06 10:09:33 +00:00
|
|
|
},
|
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
getDocLines(docId, callback) {
|
|
|
|
rclient.get(keys.docLines({ doc_id: docId }), (error, docLines) => {
|
|
|
|
if (error) {
|
2020-05-06 10:09:33 +00:00
|
|
|
return callback(error)
|
|
|
|
}
|
2021-11-30 13:26:20 +00:00
|
|
|
callback(null, docLines)
|
2020-05-06 10:09:33 +00:00
|
|
|
})
|
|
|
|
},
|
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
getPreviousDocOps(docId, start, end, callback) {
|
2020-05-06 10:09:33 +00:00
|
|
|
const timer = new metrics.Timer('redis.get-prev-docops')
|
2021-11-30 13:26:20 +00:00
|
|
|
rclient.llen(keys.docOps({ doc_id: docId }), (error, length) => {
|
|
|
|
if (error) {
|
2020-05-06 10:09:33 +00:00
|
|
|
return callback(error)
|
|
|
|
}
|
2021-11-30 13:26:20 +00:00
|
|
|
rclient.get(keys.docVersion({ doc_id: docId }), (error, version) => {
|
|
|
|
if (error) {
|
|
|
|
return callback(error)
|
|
|
|
}
|
|
|
|
version = parseInt(version, 10)
|
|
|
|
const firstVersionInRedis = version - length
|
2021-07-13 11:04:42 +00:00
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
if (start < firstVersionInRedis || end > version) {
|
|
|
|
error = new Errors.OpRangeNotAvailableError(
|
2024-07-18 13:01:09 +00:00
|
|
|
'doc ops range is not loaded in redis',
|
|
|
|
{ firstVersionInRedis, version, ttlInS: RedisManager.DOC_OPS_TTL }
|
2021-11-30 13:26:20 +00:00
|
|
|
)
|
|
|
|
logger.debug(
|
|
|
|
{ err: error, docId, length, version, start, end },
|
|
|
|
'doc ops range is not loaded in redis'
|
|
|
|
)
|
|
|
|
return callback(error)
|
|
|
|
}
|
2021-07-13 11:04:42 +00:00
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
start = start - firstVersionInRedis
|
|
|
|
if (end > -1) {
|
|
|
|
end = end - firstVersionInRedis
|
|
|
|
}
|
2021-07-13 11:04:42 +00:00
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
if (isNaN(start) || isNaN(end)) {
|
|
|
|
error = new Error('inconsistent version or lengths')
|
|
|
|
logger.error(
|
|
|
|
{ err: error, docId, length, version, start, end },
|
|
|
|
'inconsistent version or length'
|
2021-07-13 11:04:42 +00:00
|
|
|
)
|
2021-11-30 13:26:20 +00:00
|
|
|
return callback(error)
|
2021-07-13 11:04:42 +00:00
|
|
|
}
|
2021-11-30 13:26:20 +00:00
|
|
|
|
|
|
|
rclient.lrange(
|
|
|
|
keys.docOps({ doc_id: docId }),
|
|
|
|
start,
|
|
|
|
end,
|
|
|
|
(error, jsonOps) => {
|
|
|
|
let ops
|
|
|
|
if (error) {
|
|
|
|
return callback(error)
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
ops = jsonOps.map(jsonOp => JSON.parse(jsonOp))
|
|
|
|
} catch (e) {
|
|
|
|
return callback(e)
|
|
|
|
}
|
|
|
|
const timeSpan = timer.done()
|
|
|
|
if (timeSpan > MAX_REDIS_REQUEST_LENGTH) {
|
|
|
|
error = new Error('redis getPreviousDocOps exceeded timeout')
|
|
|
|
return callback(error)
|
|
|
|
}
|
|
|
|
callback(null, ops)
|
|
|
|
}
|
|
|
|
)
|
|
|
|
})
|
2020-05-06 10:09:33 +00:00
|
|
|
})
|
|
|
|
},
|
|
|
|
|
|
|
|
DOC_OPS_TTL: 60 * minutes,
|
|
|
|
DOC_OPS_MAX_LENGTH: 100,
|
|
|
|
updateDocument(
|
2021-11-30 13:26:20 +00:00
|
|
|
projectId,
|
|
|
|
docId,
|
2020-05-06 10:09:33 +00:00
|
|
|
docLines,
|
|
|
|
newVersion,
|
|
|
|
appliedOps,
|
|
|
|
ranges,
|
|
|
|
updateMeta,
|
|
|
|
callback
|
|
|
|
) {
|
|
|
|
if (appliedOps == null) {
|
|
|
|
appliedOps = []
|
|
|
|
}
|
2023-06-06 10:19:55 +00:00
|
|
|
RedisManager.getDocVersion(docId, (error, currentVersion) => {
|
|
|
|
if (error) {
|
|
|
|
return callback(error)
|
|
|
|
}
|
|
|
|
if (currentVersion + appliedOps.length !== newVersion) {
|
|
|
|
error = new Error(`Version mismatch. '${docId}' is corrupted.`)
|
|
|
|
logger.error(
|
|
|
|
{
|
|
|
|
err: error,
|
|
|
|
docId,
|
|
|
|
currentVersion,
|
|
|
|
newVersion,
|
|
|
|
opsLength: appliedOps.length,
|
|
|
|
},
|
|
|
|
'version mismatch'
|
|
|
|
)
|
|
|
|
return callback(error)
|
|
|
|
}
|
|
|
|
|
|
|
|
const jsonOps = appliedOps.map(op => JSON.stringify(op))
|
|
|
|
for (const op of jsonOps) {
|
|
|
|
if (op.indexOf('\u0000') !== -1) {
|
|
|
|
error = new Error('null bytes found in jsonOps')
|
|
|
|
// this check was added to catch memory corruption in JSON.stringify
|
|
|
|
logger.error({ err: error, docId, jsonOps }, error.message)
|
2020-05-06 10:09:33 +00:00
|
|
|
return callback(error)
|
|
|
|
}
|
2023-06-06 10:19:55 +00:00
|
|
|
}
|
2021-07-13 11:04:42 +00:00
|
|
|
|
2023-06-06 10:19:55 +00:00
|
|
|
const newDocLines = JSON.stringify(docLines)
|
|
|
|
if (newDocLines.indexOf('\u0000') !== -1) {
|
|
|
|
error = new Error('null bytes found in doc lines')
|
|
|
|
// this check was added to catch memory corruption in JSON.stringify
|
|
|
|
logger.error({ err: error, docId, newDocLines }, error.message)
|
|
|
|
return callback(error)
|
|
|
|
}
|
|
|
|
// Do an optimised size check on the docLines using the serialised
|
|
|
|
// length as an upper bound
|
|
|
|
const sizeBound = newDocLines.length
|
|
|
|
if (docIsTooLarge(sizeBound, docLines, Settings.max_doc_length)) {
|
|
|
|
const err = new Error('blocking doc update: doc is too large')
|
|
|
|
const docSize = newDocLines.length
|
|
|
|
logger.error({ projectId, docId, err, docSize }, err.message)
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
const newHash = RedisManager._computeHash(newDocLines)
|
2021-07-13 11:04:42 +00:00
|
|
|
|
2023-06-06 10:19:55 +00:00
|
|
|
const opVersions = appliedOps.map(op => op?.v)
|
|
|
|
logger.debug(
|
|
|
|
{
|
|
|
|
docId,
|
|
|
|
version: newVersion,
|
|
|
|
hash: newHash,
|
|
|
|
opVersions,
|
|
|
|
},
|
|
|
|
'updating doc in redis'
|
|
|
|
)
|
|
|
|
// record bytes sent to redis in update
|
|
|
|
metrics.summary('redis.docLines', newDocLines.length, {
|
|
|
|
status: 'update',
|
|
|
|
})
|
|
|
|
RedisManager._serializeRanges(ranges, (error, ranges) => {
|
|
|
|
if (error) {
|
|
|
|
logger.error({ err: error, docId }, error.message)
|
2020-05-06 10:09:33 +00:00
|
|
|
return callback(error)
|
|
|
|
}
|
2023-06-06 10:19:55 +00:00
|
|
|
if (ranges && ranges.indexOf('\u0000') !== -1) {
|
|
|
|
error = new Error('null bytes found in ranges')
|
|
|
|
// this check was added to catch memory corruption in JSON.stringify
|
|
|
|
logger.error({ err: error, docId, ranges }, error.message)
|
|
|
|
return callback(error)
|
2020-05-06 10:09:33 +00:00
|
|
|
}
|
2024-05-29 15:50:34 +00:00
|
|
|
|
|
|
|
// Make sure that this MULTI operation only operates on doc
|
|
|
|
// specific keys, i.e. keys that have the doc id in curly braces.
|
|
|
|
// The curly braces identify a hash key for Redis and ensures that
|
|
|
|
// the MULTI's operations are all done on the same node in a
|
|
|
|
// cluster environment.
|
2023-06-06 10:19:55 +00:00
|
|
|
const multi = rclient.multi()
|
|
|
|
multi.mset({
|
|
|
|
[keys.docLines({ doc_id: docId })]: newDocLines,
|
|
|
|
[keys.docVersion({ doc_id: docId })]: newVersion,
|
|
|
|
[keys.docHash({ doc_id: docId })]: newHash,
|
|
|
|
[keys.ranges({ doc_id: docId })]: ranges,
|
|
|
|
[keys.lastUpdatedAt({ doc_id: docId })]: Date.now(),
|
|
|
|
[keys.lastUpdatedBy({ doc_id: docId })]:
|
|
|
|
updateMeta && updateMeta.user_id,
|
2021-07-13 11:04:42 +00:00
|
|
|
})
|
2023-06-06 10:19:55 +00:00
|
|
|
multi.ltrim(
|
|
|
|
keys.docOps({ doc_id: docId }),
|
|
|
|
-RedisManager.DOC_OPS_MAX_LENGTH,
|
|
|
|
-1
|
|
|
|
) // index 3
|
|
|
|
// push the ops last so we can get the lengths at fixed index position 7
|
|
|
|
if (jsonOps.length > 0) {
|
|
|
|
multi.rpush(keys.docOps({ doc_id: docId }), ...jsonOps) // index 5
|
|
|
|
// expire must come after rpush since before it will be a no-op if the list is empty
|
|
|
|
multi.expire(keys.docOps({ doc_id: docId }), RedisManager.DOC_OPS_TTL) // index 6
|
|
|
|
}
|
2023-09-06 14:23:23 +00:00
|
|
|
// Set the unflushed timestamp to the current time if not set ("NX" flag).
|
|
|
|
multi.set(keys.unflushedTime({ doc_id: docId }), Date.now(), 'NX')
|
2023-06-06 10:19:55 +00:00
|
|
|
multi.exec((error, result) => {
|
2021-11-30 13:26:20 +00:00
|
|
|
if (error) {
|
2020-05-06 10:09:33 +00:00
|
|
|
return callback(error)
|
|
|
|
}
|
2024-02-13 13:15:43 +00:00
|
|
|
callback()
|
2020-05-06 10:09:33 +00:00
|
|
|
})
|
2023-06-06 10:19:55 +00:00
|
|
|
})
|
|
|
|
})
|
2020-05-06 10:09:33 +00:00
|
|
|
},
|
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
renameDoc(projectId, docId, userId, update, projectHistoryId, callback) {
|
|
|
|
RedisManager.getDoc(projectId, docId, (error, lines, version) => {
|
|
|
|
if (error) {
|
|
|
|
return callback(error)
|
|
|
|
}
|
|
|
|
if (lines != null && version != null) {
|
|
|
|
if (!update.newPathname) {
|
|
|
|
logger.warn(
|
|
|
|
{ projectId, docId, update },
|
|
|
|
'missing pathname in RedisManager.renameDoc'
|
2021-07-13 11:04:42 +00:00
|
|
|
)
|
2021-11-30 13:26:20 +00:00
|
|
|
metrics.inc('pathname', 1, {
|
|
|
|
path: 'RedisManager.renameDoc',
|
|
|
|
status: update.newPathname === '' ? 'zero-length' : 'undefined',
|
|
|
|
})
|
2021-07-13 11:04:42 +00:00
|
|
|
}
|
2021-11-30 13:26:20 +00:00
|
|
|
rclient.set(
|
|
|
|
keys.pathname({ doc_id: docId }),
|
|
|
|
update.newPathname,
|
|
|
|
callback
|
|
|
|
)
|
|
|
|
} else {
|
|
|
|
callback()
|
2020-05-06 10:09:33 +00:00
|
|
|
}
|
2021-11-30 13:26:20 +00:00
|
|
|
})
|
2020-05-06 10:09:33 +00:00
|
|
|
},
|
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
clearUnflushedTime(docId, callback) {
|
|
|
|
rclient.del(keys.unflushedTime({ doc_id: docId }), callback)
|
2020-05-06 10:09:33 +00:00
|
|
|
},
|
|
|
|
|
2024-05-28 11:24:06 +00:00
|
|
|
updateCommentState(docId, commentId, resolved, callback) {
|
|
|
|
if (resolved) {
|
|
|
|
rclient.sadd(
|
|
|
|
keys.resolvedCommentIds({ doc_id: docId }),
|
|
|
|
commentId,
|
|
|
|
callback
|
|
|
|
)
|
|
|
|
} else {
|
|
|
|
rclient.srem(
|
|
|
|
keys.resolvedCommentIds({ doc_id: docId }),
|
|
|
|
commentId,
|
|
|
|
callback
|
|
|
|
)
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
getDocIdsInProject(projectId, callback) {
|
|
|
|
rclient.smembers(keys.docsInProject({ project_id: projectId }), callback)
|
2020-05-06 10:09:33 +00:00
|
|
|
},
|
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
/**
|
|
|
|
* Get lastupdatedat timestamps for an array of docIds
|
|
|
|
*/
|
|
|
|
getDocTimestamps(docIds, callback) {
|
|
|
|
async.mapSeries(
|
|
|
|
docIds,
|
|
|
|
(docId, cb) => rclient.get(keys.lastUpdatedAt({ doc_id: docId }), cb),
|
2020-05-06 10:09:33 +00:00
|
|
|
callback
|
|
|
|
)
|
|
|
|
},
|
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
/**
|
|
|
|
* Store the project id in a sorted set ordered by time with a random offset
|
|
|
|
* to smooth out spikes
|
|
|
|
*/
|
|
|
|
queueFlushAndDeleteProject(projectId, callback) {
|
2020-05-06 10:09:33 +00:00
|
|
|
const SMOOTHING_OFFSET =
|
|
|
|
Settings.smoothingOffset > 0
|
|
|
|
? Math.round(Settings.smoothingOffset * Math.random())
|
|
|
|
: 0
|
2021-11-30 13:26:20 +00:00
|
|
|
rclient.zadd(
|
2020-05-06 10:09:33 +00:00
|
|
|
keys.flushAndDeleteQueue(),
|
|
|
|
Date.now() + SMOOTHING_OFFSET,
|
2021-11-30 13:26:20 +00:00
|
|
|
projectId,
|
2020-05-06 10:09:33 +00:00
|
|
|
callback
|
|
|
|
)
|
|
|
|
},
|
|
|
|
|
2021-11-30 13:26:20 +00:00
|
|
|
/**
|
|
|
|
* Find the oldest queued flush that is before the cutoff time
|
|
|
|
*/
|
2020-05-06 10:09:33 +00:00
|
|
|
getNextProjectToFlushAndDelete(cutoffTime, callback) {
|
2021-11-30 13:26:20 +00:00
|
|
|
rclient.zrangebyscore(
|
2020-05-06 10:09:33 +00:00
|
|
|
keys.flushAndDeleteQueue(),
|
|
|
|
0,
|
|
|
|
cutoffTime,
|
|
|
|
'WITHSCORES',
|
|
|
|
'LIMIT',
|
|
|
|
0,
|
|
|
|
1,
|
2021-11-30 13:26:20 +00:00
|
|
|
(err, reply) => {
|
|
|
|
if (err) {
|
2020-05-06 10:09:33 +00:00
|
|
|
return callback(err)
|
|
|
|
}
|
2021-11-30 13:26:20 +00:00
|
|
|
// return if no projects ready to be processed
|
|
|
|
if (!reply || reply.length === 0) {
|
2020-05-06 10:09:33 +00:00
|
|
|
return callback()
|
2021-11-30 13:26:20 +00:00
|
|
|
}
|
2020-05-06 10:09:33 +00:00
|
|
|
// pop the oldest entry (get and remove in a multi)
|
|
|
|
const multi = rclient.multi()
|
|
|
|
// Poor man's version of ZPOPMIN, which is only available in Redis 5.
|
|
|
|
multi.zrange(keys.flushAndDeleteQueue(), 0, 0, 'WITHSCORES')
|
|
|
|
multi.zremrangebyrank(keys.flushAndDeleteQueue(), 0, 0)
|
|
|
|
multi.zcard(keys.flushAndDeleteQueue()) // the total length of the queue (for metrics)
|
2021-11-30 13:26:20 +00:00
|
|
|
multi.exec((err, reply) => {
|
|
|
|
if (err) {
|
2020-05-06 10:09:33 +00:00
|
|
|
return callback(err)
|
|
|
|
}
|
2021-11-30 13:26:20 +00:00
|
|
|
if (!reply || reply.length === 0) {
|
2020-05-06 10:09:33 +00:00
|
|
|
return callback()
|
|
|
|
}
|
2021-11-30 13:26:20 +00:00
|
|
|
const [key, timestamp] = reply[0]
|
2020-05-06 10:09:33 +00:00
|
|
|
const queueLength = reply[2]
|
2021-11-30 13:26:20 +00:00
|
|
|
callback(null, key, timestamp, queueLength)
|
2020-05-06 10:09:33 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
)
|
|
|
|
},
|
|
|
|
|
2024-06-18 14:00:00 +00:00
|
|
|
setHistoryRangesSupportFlag(docId, historyRangesSupport, callback) {
|
|
|
|
if (historyRangesSupport) {
|
|
|
|
rclient.sadd(keys.historyRangesSupport(), docId, callback)
|
|
|
|
} else {
|
|
|
|
rclient.srem(keys.historyRangesSupport(), docId, callback)
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
2024-07-02 13:55:35 +00:00
|
|
|
blockProject(projectId, callback) {
|
|
|
|
// Make sure that this MULTI operation only operates on project
|
|
|
|
// specific keys, i.e. keys that have the project id in curly braces.
|
|
|
|
// The curly braces identify a hash key for Redis and ensures that
|
|
|
|
// the MULTI's operations are all done on the same node in a
|
|
|
|
// cluster environment.
|
|
|
|
const multi = rclient.multi()
|
|
|
|
multi.setex(
|
|
|
|
keys.projectBlock({ project_id: projectId }),
|
|
|
|
PROJECT_BLOCK_TTL_SECS,
|
|
|
|
'1'
|
|
|
|
)
|
|
|
|
multi.scard(keys.docsInProject({ project_id: projectId }))
|
|
|
|
multi.exec((err, reply) => {
|
|
|
|
if (err) {
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
const docsInProject = reply[1]
|
|
|
|
if (docsInProject > 0) {
|
|
|
|
// Too late to lock the project
|
|
|
|
rclient.del(keys.projectBlock({ project_id: projectId }), err => {
|
|
|
|
if (err) {
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
callback(null, false)
|
|
|
|
})
|
|
|
|
} else {
|
|
|
|
callback(null, true)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
},
|
|
|
|
|
|
|
|
unblockProject(projectId, callback) {
|
|
|
|
rclient.del(keys.projectBlock({ project_id: projectId }), (err, reply) => {
|
|
|
|
if (err) {
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
const wasBlocked = reply === 1
|
|
|
|
callback(null, wasBlocked)
|
|
|
|
})
|
|
|
|
},
|
|
|
|
|
2020-05-06 10:09:33 +00:00
|
|
|
_serializeRanges(ranges, callback) {
|
|
|
|
let jsonRanges = JSON.stringify(ranges)
|
2021-11-30 13:26:20 +00:00
|
|
|
if (jsonRanges && jsonRanges.length > MAX_RANGES_SIZE) {
|
2020-05-06 10:09:33 +00:00
|
|
|
return callback(new Error('ranges are too large'))
|
|
|
|
}
|
|
|
|
if (jsonRanges === '{}') {
|
|
|
|
// Most doc will have empty ranges so don't fill redis with lots of '{}' keys
|
|
|
|
jsonRanges = null
|
|
|
|
}
|
2021-11-30 13:26:20 +00:00
|
|
|
callback(null, jsonRanges)
|
2020-05-06 10:09:33 +00:00
|
|
|
},
|
|
|
|
|
|
|
|
_deserializeRanges(ranges) {
|
|
|
|
if (ranges == null || ranges === '') {
|
|
|
|
return {}
|
|
|
|
} else {
|
|
|
|
return JSON.parse(ranges)
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
_computeHash(docLines) {
|
|
|
|
// use sha1 checksum of doclines to detect data corruption.
|
|
|
|
//
|
|
|
|
// note: must specify 'utf8' encoding explicitly, as the default is
|
|
|
|
// binary in node < v5
|
|
|
|
return crypto.createHash('sha1').update(docLines, 'utf8').digest('hex')
|
2021-07-13 11:04:42 +00:00
|
|
|
},
|
2020-05-06 10:09:33 +00:00
|
|
|
}
|
2024-01-23 10:45:06 +00:00
|
|
|
|
2024-01-30 15:35:54 +00:00
|
|
|
module.exports = RedisManager
|
|
|
|
module.exports.promises = promisifyAll(RedisManager, {
|
|
|
|
without: ['_deserializeRanges', '_computeHash'],
|
2024-01-23 10:45:06 +00:00
|
|
|
multiResult: {
|
|
|
|
getDoc: [
|
|
|
|
'lines',
|
|
|
|
'version',
|
|
|
|
'ranges',
|
|
|
|
'pathname',
|
|
|
|
'projectHistoryId',
|
|
|
|
'unflushedTime',
|
|
|
|
'lastUpdatedAt',
|
|
|
|
'lastUpdatedBy',
|
2024-02-09 14:09:40 +00:00
|
|
|
'historyRangesSupport',
|
2024-05-28 11:24:06 +00:00
|
|
|
'resolvedCommentIds',
|
2024-01-23 10:45:06 +00:00
|
|
|
],
|
|
|
|
getNextProjectToFlushAndDelete: [
|
|
|
|
'projectId',
|
|
|
|
'flushTimestamp',
|
|
|
|
'queueLength',
|
|
|
|
],
|
|
|
|
},
|
|
|
|
})
|