Merge pull request #5132 from overleaf/bg-fix-size-check

[docupdater] use more accurate doc size check

GitOrigin-RevId: f66d68a7f7fdc127cc31539abdcab65549823d02
This commit is contained in:
Brian Gough 2021-09-20 09:23:37 +01:00 committed by Copybot
parent 60db065c37
commit 865a973426
5 changed files with 194 additions and 15 deletions

View file

@ -8,6 +8,7 @@ const Settings = require('@overleaf/settings')
const Metrics = require('./Metrics')
const ProjectFlusher = require('./ProjectFlusher')
const DeleteQueueManager = require('./DeleteQueueManager')
const { getTotalSizeOfLines } = require('./Limits')
const async = require('async')
module.exports = {
@ -83,14 +84,6 @@ function peekDoc(req, res, next) {
})
}
function _getTotalSizeOfLines(lines) {
let size = 0
for (const line of lines) {
size += line.length + 1
}
return size
}
function getProjectDocsAndFlushIfOld(req, res, next) {
const projectId = req.params.project_id
const projectStateHash = req.query.state
@ -150,7 +143,7 @@ function setDoc(req, res, next) {
const docId = req.params.doc_id
const projectId = req.params.project_id
const { lines, source, user_id: userId, undoing } = req.body
const lineSize = _getTotalSizeOfLines(lines)
const lineSize = getTotalSizeOfLines(lines)
if (lineSize > Settings.max_doc_length) {
logger.log(
{ projectId, docId, source, lineSize, userId },

View file

@ -0,0 +1,31 @@
module.exports = {
// compute the total size of the document in chararacters, including newlines
getTotalSizeOfLines(lines) {
let size = 0
for (const line of lines) {
size += line.length + 1 // include the newline
}
return size
},
// check whether the total size of the document in characters exceeds the
// maxDocLength.
//
// The estimated size should be an upper bound on the true size, typically
// it will be the size of the JSON.stringified array of lines. If the
// estimated size is less than the maxDocLength then we know that the total
// size of lines will also be less than maxDocLength.
docIsTooLarge(estimatedSize, lines, maxDocLength) {
if (estimatedSize <= maxDocLength) {
return false // definitely under the limit, no need to calculate the total size
}
// calculate the total size, bailing out early if the size limit is reached
let size = 0
for (const line of lines) {
size += line.length + 1 // include the newline
if (size > maxDocLength) return true
}
// since we didn't hit the limit in the loop, the document is within the allowed length
return false
},
}

View file

@ -24,6 +24,7 @@ const Errors = require('./Errors')
const crypto = require('crypto')
const async = require('async')
const ProjectHistoryRedisManager = require('./ProjectHistoryRedisManager')
const { docIsTooLarge } = require('./Limits')
// Sometimes Redis calls take an unexpectedly long time. We have to be
// quick with Redis calls because we're holding a lock that expires
@ -64,6 +65,7 @@ module.exports = RedisManager = {
timer.done()
return _callback(error)
}
const docLinesArray = docLines
docLines = JSON.stringify(docLines)
if (docLines.indexOf('\u0000') !== -1) {
const error = new Error('null bytes found in doc lines')
@ -72,8 +74,10 @@ module.exports = RedisManager = {
logger.error({ err: error, doc_id, docLines }, error.message)
return callback(error)
}
// Do a cheap size check on the serialized blob.
if (docLines.length > Settings.max_doc_length) {
// Do an optimised size check on the docLines using the serialised
// length as an upper bound
const sizeBound = docLines.length
if (docIsTooLarge(sizeBound, docLinesArray, Settings.max_doc_length)) {
const docSize = docLines.length
const err = new Error('blocking doc insert into redis: doc is too large')
logger.error({ project_id, doc_id, err, docSize }, err.message)
@ -468,8 +472,10 @@ module.exports = RedisManager = {
logger.error({ err: error, doc_id, newDocLines }, error.message)
return callback(error)
}
// Do a cheap size check on the serialized blob.
if (newDocLines.length > Settings.max_doc_length) {
// Do an optimised size check on the docLines using the serialised
// length as an upper bound
const sizeBound = newDocLines.length
if (docIsTooLarge(sizeBound, docLines, Settings.max_doc_length)) {
const err = new Error('blocking doc update: doc is too large')
const docSize = newDocLines.length
logger.error({ project_id, doc_id, err, docSize }, err.message)

View file

@ -27,7 +27,7 @@ describe('SizeChecks', function () {
describe('when a doc is above the doc size limit already', function () {
beforeEach(function () {
this.lines = ['0123456789'.repeat(Settings.max_doc_length / 10 + 1)]
this.lines = ['x'.repeat(Settings.max_doc_length)] // including the extra newline, this will be over the limit
MockWebApi.insertDoc(this.project_id, this.doc_id, {
lines: this.lines,
v: this.version,
@ -72,9 +72,74 @@ describe('SizeChecks', function () {
})
})
describe('when the stringified JSON is above the doc size limit but the doc character count is not', function () {
beforeEach(function () {
let charsRemaining = Settings.max_doc_length
this.lines = []
// Take the maximum allowed doc length and split it into N lines of 63 characters + a newline.
// The character count will be exactly max_doc_length
// The JSON stringified size will exceed max_doc_length, due to the JSON formatting of the array.
// This document should be allowed, because we use the character count as the limit, not the JSON size.
while (charsRemaining > 0) {
const charstoAdd = Math.min(charsRemaining - 1, 63) // allow for additional newline
this.lines.push('x'.repeat(charstoAdd))
charsRemaining -= charstoAdd + 1
}
MockWebApi.insertDoc(this.project_id, this.doc_id, {
lines: this.lines,
v: this.version,
})
})
it('should be able to fetch the doc', function (done) {
DocUpdaterClient.getDoc(
this.project_id,
this.doc_id,
(error, res, doc) => {
if (error) return done(error)
expect(doc.lines).to.deep.equal(this.lines)
done()
}
)
})
describe('when trying to update', function () {
beforeEach(function (done) {
const update = {
doc: this.doc_id,
op: this.update.op,
v: this.version,
}
DocUpdaterClient.sendUpdate(
this.project_id,
this.doc_id,
update,
error => {
if (error != null) {
throw error
}
setTimeout(done, 200)
}
)
})
it('should not update the doc', function (done) {
DocUpdaterClient.getDoc(
this.project_id,
this.doc_id,
(error, res, doc) => {
if (error) return done(error)
expect(doc.lines).to.deep.equal(this.lines)
done()
}
)
})
})
})
describe('when a doc is just below the doc size limit', function () {
beforeEach(function () {
this.lines = ['0123456789'.repeat(Settings.max_doc_length / 10 - 1)]
this.lines = ['x'.repeat(Settings.max_doc_length - 1)] // character count is exactly max_doc_length after including the newline
MockWebApi.insertDoc(this.project_id, this.doc_id, {
lines: this.lines,
v: this.version,

View file

@ -0,0 +1,84 @@
const { expect } = require('chai')
const modulePath = '../../../../app/js/Limits.js'
const SandboxedModule = require('sandboxed-module')
describe('Limits', function () {
beforeEach(function () {
return (this.Limits = SandboxedModule.require(modulePath))
})
describe('getTotalSizeOfLines', function () {
it('should compute the character count for a document with multiple lines', function () {
const count = this.Limits.getTotalSizeOfLines(['123', '4567'])
expect(count).to.equal(9)
})
it('should compute the character count for a document with a single line', function () {
const count = this.Limits.getTotalSizeOfLines(['123'])
expect(count).to.equal(4)
})
it('should compute the character count for an empty document', function () {
const count = this.Limits.getTotalSizeOfLines([])
expect(count).to.equal(0)
})
})
describe('docIsTooLarge', function () {
describe('when the estimated size is below the limit', function () {
it('should return false when the estimated size is below the limit', function () {
const result = this.Limits.docIsTooLarge(128, ['hello', 'world'], 1024)
expect(result).to.be.false
})
})
describe('when the estimated size is at the limit', function () {
it('should return false when the estimated size is at the limit', function () {
const result = this.Limits.docIsTooLarge(1024, ['hello', 'world'], 1024)
expect(result).to.be.false
})
})
describe('when the estimated size is above the limit', function () {
it('should return false when the actual character count is below the limit', function () {
const result = this.Limits.docIsTooLarge(2048, ['hello', 'world'], 1024)
expect(result).to.be.false
})
it('should return false when the actual character count is at the limit', function () {
const result = this.Limits.docIsTooLarge(2048, ['x'.repeat(1023)], 1024)
expect(result).to.be.false
})
it('should return true when the actual character count is above the limit by 1', function () {
const count = this.Limits.docIsTooLarge(2048, ['x'.repeat(1024)], 1024)
expect(count).to.be.true
})
it('should return true when the actual character count is above the limit', function () {
const count = this.Limits.docIsTooLarge(2048, ['x'.repeat(2000)], 1024)
expect(count).to.be.true
})
})
describe('when the document has many lines', function () {
it('should return false when the actual character count is below the limit ', function () {
const count = this.Limits.docIsTooLarge(
2048,
'1234567890'.repeat(100).split('0'),
1024
)
expect(count).to.be.false
})
it('should return true when the actual character count is above the limit', function () {
const count = this.Limits.docIsTooLarge(
2048,
'1234567890'.repeat(2000).split('0'),
1024
)
expect(count).to.be.true
})
})
})
})