mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-07 20:31:06 -05:00
Merge pull request #5132 from overleaf/bg-fix-size-check
[docupdater] use more accurate doc size check GitOrigin-RevId: f66d68a7f7fdc127cc31539abdcab65549823d02
This commit is contained in:
parent
60db065c37
commit
865a973426
5 changed files with 194 additions and 15 deletions
|
@ -8,6 +8,7 @@ const Settings = require('@overleaf/settings')
|
|||
const Metrics = require('./Metrics')
|
||||
const ProjectFlusher = require('./ProjectFlusher')
|
||||
const DeleteQueueManager = require('./DeleteQueueManager')
|
||||
const { getTotalSizeOfLines } = require('./Limits')
|
||||
const async = require('async')
|
||||
|
||||
module.exports = {
|
||||
|
@ -83,14 +84,6 @@ function peekDoc(req, res, next) {
|
|||
})
|
||||
}
|
||||
|
||||
function _getTotalSizeOfLines(lines) {
|
||||
let size = 0
|
||||
for (const line of lines) {
|
||||
size += line.length + 1
|
||||
}
|
||||
return size
|
||||
}
|
||||
|
||||
function getProjectDocsAndFlushIfOld(req, res, next) {
|
||||
const projectId = req.params.project_id
|
||||
const projectStateHash = req.query.state
|
||||
|
@ -150,7 +143,7 @@ function setDoc(req, res, next) {
|
|||
const docId = req.params.doc_id
|
||||
const projectId = req.params.project_id
|
||||
const { lines, source, user_id: userId, undoing } = req.body
|
||||
const lineSize = _getTotalSizeOfLines(lines)
|
||||
const lineSize = getTotalSizeOfLines(lines)
|
||||
if (lineSize > Settings.max_doc_length) {
|
||||
logger.log(
|
||||
{ projectId, docId, source, lineSize, userId },
|
||||
|
|
31
services/document-updater/app/js/Limits.js
Normal file
31
services/document-updater/app/js/Limits.js
Normal file
|
@ -0,0 +1,31 @@
|
|||
module.exports = {
|
||||
// compute the total size of the document in chararacters, including newlines
|
||||
getTotalSizeOfLines(lines) {
|
||||
let size = 0
|
||||
for (const line of lines) {
|
||||
size += line.length + 1 // include the newline
|
||||
}
|
||||
return size
|
||||
},
|
||||
|
||||
// check whether the total size of the document in characters exceeds the
|
||||
// maxDocLength.
|
||||
//
|
||||
// The estimated size should be an upper bound on the true size, typically
|
||||
// it will be the size of the JSON.stringified array of lines. If the
|
||||
// estimated size is less than the maxDocLength then we know that the total
|
||||
// size of lines will also be less than maxDocLength.
|
||||
docIsTooLarge(estimatedSize, lines, maxDocLength) {
|
||||
if (estimatedSize <= maxDocLength) {
|
||||
return false // definitely under the limit, no need to calculate the total size
|
||||
}
|
||||
// calculate the total size, bailing out early if the size limit is reached
|
||||
let size = 0
|
||||
for (const line of lines) {
|
||||
size += line.length + 1 // include the newline
|
||||
if (size > maxDocLength) return true
|
||||
}
|
||||
// since we didn't hit the limit in the loop, the document is within the allowed length
|
||||
return false
|
||||
},
|
||||
}
|
|
@ -24,6 +24,7 @@ const Errors = require('./Errors')
|
|||
const crypto = require('crypto')
|
||||
const async = require('async')
|
||||
const ProjectHistoryRedisManager = require('./ProjectHistoryRedisManager')
|
||||
const { docIsTooLarge } = require('./Limits')
|
||||
|
||||
// Sometimes Redis calls take an unexpectedly long time. We have to be
|
||||
// quick with Redis calls because we're holding a lock that expires
|
||||
|
@ -64,6 +65,7 @@ module.exports = RedisManager = {
|
|||
timer.done()
|
||||
return _callback(error)
|
||||
}
|
||||
const docLinesArray = docLines
|
||||
docLines = JSON.stringify(docLines)
|
||||
if (docLines.indexOf('\u0000') !== -1) {
|
||||
const error = new Error('null bytes found in doc lines')
|
||||
|
@ -72,8 +74,10 @@ module.exports = RedisManager = {
|
|||
logger.error({ err: error, doc_id, docLines }, error.message)
|
||||
return callback(error)
|
||||
}
|
||||
// Do a cheap size check on the serialized blob.
|
||||
if (docLines.length > Settings.max_doc_length) {
|
||||
// Do an optimised size check on the docLines using the serialised
|
||||
// length as an upper bound
|
||||
const sizeBound = docLines.length
|
||||
if (docIsTooLarge(sizeBound, docLinesArray, Settings.max_doc_length)) {
|
||||
const docSize = docLines.length
|
||||
const err = new Error('blocking doc insert into redis: doc is too large')
|
||||
logger.error({ project_id, doc_id, err, docSize }, err.message)
|
||||
|
@ -468,8 +472,10 @@ module.exports = RedisManager = {
|
|||
logger.error({ err: error, doc_id, newDocLines }, error.message)
|
||||
return callback(error)
|
||||
}
|
||||
// Do a cheap size check on the serialized blob.
|
||||
if (newDocLines.length > Settings.max_doc_length) {
|
||||
// Do an optimised size check on the docLines using the serialised
|
||||
// length as an upper bound
|
||||
const sizeBound = newDocLines.length
|
||||
if (docIsTooLarge(sizeBound, docLines, Settings.max_doc_length)) {
|
||||
const err = new Error('blocking doc update: doc is too large')
|
||||
const docSize = newDocLines.length
|
||||
logger.error({ project_id, doc_id, err, docSize }, err.message)
|
||||
|
|
|
@ -27,7 +27,7 @@ describe('SizeChecks', function () {
|
|||
|
||||
describe('when a doc is above the doc size limit already', function () {
|
||||
beforeEach(function () {
|
||||
this.lines = ['0123456789'.repeat(Settings.max_doc_length / 10 + 1)]
|
||||
this.lines = ['x'.repeat(Settings.max_doc_length)] // including the extra newline, this will be over the limit
|
||||
MockWebApi.insertDoc(this.project_id, this.doc_id, {
|
||||
lines: this.lines,
|
||||
v: this.version,
|
||||
|
@ -72,9 +72,74 @@ describe('SizeChecks', function () {
|
|||
})
|
||||
})
|
||||
|
||||
describe('when the stringified JSON is above the doc size limit but the doc character count is not', function () {
|
||||
beforeEach(function () {
|
||||
let charsRemaining = Settings.max_doc_length
|
||||
this.lines = []
|
||||
// Take the maximum allowed doc length and split it into N lines of 63 characters + a newline.
|
||||
// The character count will be exactly max_doc_length
|
||||
// The JSON stringified size will exceed max_doc_length, due to the JSON formatting of the array.
|
||||
// This document should be allowed, because we use the character count as the limit, not the JSON size.
|
||||
while (charsRemaining > 0) {
|
||||
const charstoAdd = Math.min(charsRemaining - 1, 63) // allow for additional newline
|
||||
this.lines.push('x'.repeat(charstoAdd))
|
||||
charsRemaining -= charstoAdd + 1
|
||||
}
|
||||
MockWebApi.insertDoc(this.project_id, this.doc_id, {
|
||||
lines: this.lines,
|
||||
v: this.version,
|
||||
})
|
||||
})
|
||||
|
||||
it('should be able to fetch the doc', function (done) {
|
||||
DocUpdaterClient.getDoc(
|
||||
this.project_id,
|
||||
this.doc_id,
|
||||
(error, res, doc) => {
|
||||
if (error) return done(error)
|
||||
expect(doc.lines).to.deep.equal(this.lines)
|
||||
done()
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
describe('when trying to update', function () {
|
||||
beforeEach(function (done) {
|
||||
const update = {
|
||||
doc: this.doc_id,
|
||||
op: this.update.op,
|
||||
v: this.version,
|
||||
}
|
||||
DocUpdaterClient.sendUpdate(
|
||||
this.project_id,
|
||||
this.doc_id,
|
||||
update,
|
||||
error => {
|
||||
if (error != null) {
|
||||
throw error
|
||||
}
|
||||
setTimeout(done, 200)
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
it('should not update the doc', function (done) {
|
||||
DocUpdaterClient.getDoc(
|
||||
this.project_id,
|
||||
this.doc_id,
|
||||
(error, res, doc) => {
|
||||
if (error) return done(error)
|
||||
expect(doc.lines).to.deep.equal(this.lines)
|
||||
done()
|
||||
}
|
||||
)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('when a doc is just below the doc size limit', function () {
|
||||
beforeEach(function () {
|
||||
this.lines = ['0123456789'.repeat(Settings.max_doc_length / 10 - 1)]
|
||||
this.lines = ['x'.repeat(Settings.max_doc_length - 1)] // character count is exactly max_doc_length after including the newline
|
||||
MockWebApi.insertDoc(this.project_id, this.doc_id, {
|
||||
lines: this.lines,
|
||||
v: this.version,
|
||||
|
|
84
services/document-updater/test/unit/js/Limits/LimitsTests.js
Normal file
84
services/document-updater/test/unit/js/Limits/LimitsTests.js
Normal file
|
@ -0,0 +1,84 @@
|
|||
const { expect } = require('chai')
|
||||
const modulePath = '../../../../app/js/Limits.js'
|
||||
const SandboxedModule = require('sandboxed-module')
|
||||
|
||||
describe('Limits', function () {
|
||||
beforeEach(function () {
|
||||
return (this.Limits = SandboxedModule.require(modulePath))
|
||||
})
|
||||
|
||||
describe('getTotalSizeOfLines', function () {
|
||||
it('should compute the character count for a document with multiple lines', function () {
|
||||
const count = this.Limits.getTotalSizeOfLines(['123', '4567'])
|
||||
expect(count).to.equal(9)
|
||||
})
|
||||
|
||||
it('should compute the character count for a document with a single line', function () {
|
||||
const count = this.Limits.getTotalSizeOfLines(['123'])
|
||||
expect(count).to.equal(4)
|
||||
})
|
||||
|
||||
it('should compute the character count for an empty document', function () {
|
||||
const count = this.Limits.getTotalSizeOfLines([])
|
||||
expect(count).to.equal(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe('docIsTooLarge', function () {
|
||||
describe('when the estimated size is below the limit', function () {
|
||||
it('should return false when the estimated size is below the limit', function () {
|
||||
const result = this.Limits.docIsTooLarge(128, ['hello', 'world'], 1024)
|
||||
expect(result).to.be.false
|
||||
})
|
||||
})
|
||||
|
||||
describe('when the estimated size is at the limit', function () {
|
||||
it('should return false when the estimated size is at the limit', function () {
|
||||
const result = this.Limits.docIsTooLarge(1024, ['hello', 'world'], 1024)
|
||||
expect(result).to.be.false
|
||||
})
|
||||
})
|
||||
|
||||
describe('when the estimated size is above the limit', function () {
|
||||
it('should return false when the actual character count is below the limit', function () {
|
||||
const result = this.Limits.docIsTooLarge(2048, ['hello', 'world'], 1024)
|
||||
expect(result).to.be.false
|
||||
})
|
||||
|
||||
it('should return false when the actual character count is at the limit', function () {
|
||||
const result = this.Limits.docIsTooLarge(2048, ['x'.repeat(1023)], 1024)
|
||||
expect(result).to.be.false
|
||||
})
|
||||
|
||||
it('should return true when the actual character count is above the limit by 1', function () {
|
||||
const count = this.Limits.docIsTooLarge(2048, ['x'.repeat(1024)], 1024)
|
||||
expect(count).to.be.true
|
||||
})
|
||||
|
||||
it('should return true when the actual character count is above the limit', function () {
|
||||
const count = this.Limits.docIsTooLarge(2048, ['x'.repeat(2000)], 1024)
|
||||
expect(count).to.be.true
|
||||
})
|
||||
})
|
||||
|
||||
describe('when the document has many lines', function () {
|
||||
it('should return false when the actual character count is below the limit ', function () {
|
||||
const count = this.Limits.docIsTooLarge(
|
||||
2048,
|
||||
'1234567890'.repeat(100).split('0'),
|
||||
1024
|
||||
)
|
||||
expect(count).to.be.false
|
||||
})
|
||||
|
||||
it('should return true when the actual character count is above the limit', function () {
|
||||
const count = this.Limits.docIsTooLarge(
|
||||
2048,
|
||||
'1234567890'.repeat(2000).split('0'),
|
||||
1024
|
||||
)
|
||||
expect(count).to.be.true
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
Loading…
Reference in a new issue