From e3d73d445668edc5d848cd492abf12aa55a59f85 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Wed, 25 Mar 2015 16:53:20 +0000 Subject: [PATCH 01/13] add gzip support for large documents to reduce load on redis --- .../app/coffee/RedisManager.coffee | 32 ++++++++----- .../app/coffee/ZipManager.coffee | 47 +++++++++++++++++++ .../config/settings.defaults.coffee | 3 ++ .../clearDocFromPendingUpdatesSetTests.coffee | 2 +- .../getDocsWithPendingUpdatesTests.coffee | 2 +- .../getPreviousDocOpsTests.coffee | 2 +- .../coffee/RedisManager/pushDocOpTests.coffee | 2 +- .../pushUncompressedHistoryOpTests.coffee | 2 +- 8 files changed, 75 insertions(+), 17 deletions(-) create mode 100644 services/document-updater/app/coffee/ZipManager.coffee diff --git a/services/document-updater/app/coffee/RedisManager.coffee b/services/document-updater/app/coffee/RedisManager.coffee index 69d917815e..523d7057a9 100644 --- a/services/document-updater/app/coffee/RedisManager.coffee +++ b/services/document-updater/app/coffee/RedisManager.coffee @@ -6,6 +6,11 @@ _ = require('underscore') keys = require('./RedisKeyBuilder') logger = require('logger-sharelatex') metrics = require('./Metrics') +ZipManager = require('./ZipManager') + +redisOptions = _.clone(Settings.redis.web) +redisOptions.return_buffers = true +rclientBuffer = redis.createClient(redisOptions) # Make times easy to read minutes = 60 # seconds for Redis expire @@ -44,19 +49,21 @@ module.exports = RedisManager = getDoc : (doc_id, callback = (error, lines, version) ->)-> timer = new metrics.Timer("redis.get-doc") - multi = rclient.multi() + # use Buffer when retrieving data as it may be gzipped + multi = rclientBuffer.multi() linesKey = keys.docLines(doc_id:doc_id) multi.get linesKey multi.get keys.docVersion(doc_id:doc_id) multi.exec (error, result)-> timer.done() return callback(error) if error? - try - docLines = JSON.parse result[0] - catch e - return callback(e) - version = parseInt(result[1] or 0, 10) - callback null, docLines, version + ZipManager.uncompressIfNeeded doc_id, result, (error, result) -> + try + docLines = JSON.parse result[0] + catch e + return callback(e) + version = parseInt(result[1] or 0, 10) + callback null, docLines, version getDocVersion: (doc_id, callback = (error, version) ->) -> rclient.get keys.docVersion(doc_id: doc_id), (error, version) -> @@ -70,11 +77,12 @@ module.exports = RedisManager = callback null, len setDocument : (doc_id, docLines, version, callback = (error) ->)-> - multi = rclient.multi() - multi.set keys.docLines(doc_id:doc_id), JSON.stringify(docLines) - multi.set keys.docVersion(doc_id:doc_id), version - multi.incr keys.now("docsets") - multi.exec (error, replys) -> callback(error) + ZipManager.compressIfNeeded doc_id, JSON.stringify(docLines), (err, result) -> + multi = rclient.multi() + multi.set keys.docLines(doc_id:doc_id), result + multi.set keys.docVersion(doc_id:doc_id), version + multi.incr keys.now("docsets") + multi.exec (error, replys) -> callback(error) getPendingUpdatesForDoc : (doc_id, callback)-> multi = rclient.multi() diff --git a/services/document-updater/app/coffee/ZipManager.coffee b/services/document-updater/app/coffee/ZipManager.coffee new file mode 100644 index 0000000000..3809d1c7da --- /dev/null +++ b/services/document-updater/app/coffee/ZipManager.coffee @@ -0,0 +1,47 @@ +Settings = require('settings-sharelatex') +logger = require('logger-sharelatex') +metrics = require('./Metrics') +zlib = require('zlib') + +ZIP_WRITES_ENABLED = Settings.redis.zip?.writesEnabled? +ZIP_MINSIZE = Settings.redis.zip?.minsize || 64*1024 + +module.exports = ZipManager = + uncompressIfNeeded: (doc_id, result, callback) -> + # result is an array of [text, version]. Each entry is a node + # Buffer object which we need to convert to strings on output + + # first make sure the version (result[1]) is returned as a string + if result?[1]?.toString? + result[1] = result[1].toString() + + # now uncompress the text (result[0]) if needed + buf = result?[0] + + # Check if we have a GZIP file + if buf? and buf[0] == 0x1F and buf[1] == 0x8B + zlib.gunzip buf, (err, newbuf) -> + if err? + logger.err doc_id:doc_id, err:err, "error uncompressing doc" + callback(err, null) + else + logger.log doc_id:doc_id, fromBytes: buf.length, toChars: newbuf.length, factor: buf.length/newbuf.length, "uncompressed successfully" + result[0] = newbuf.toString() + callback(null, result) + else + # if we don't have a GZIP file it's just a buffer of text, convert it back to a string + if buf?.toString? + result[0] = buf.toString() + callback(null, result) + + compressIfNeeded: (doc_id, text, callback) -> + if ZIP_WRITES_ENABLED && ZIP_MINSIZE > 0 and text.length > ZIP_MINSIZE + zlib.gzip text, (err, buf) -> + if err? + logger.err doc_id:doc_id, err:err, "error compressing doc" + callback(err, null) + else + logger.log doc_id:doc_id, fromChars: text.length, toBytes: buf.length, factor: buf.length/text.length , "compressed successfully" + callback(null, buf) + else + callback(null, text) diff --git a/services/document-updater/config/settings.defaults.coffee b/services/document-updater/config/settings.defaults.coffee index b4f12ed81c..37be211f6e 100755 --- a/services/document-updater/config/settings.defaults.coffee +++ b/services/document-updater/config/settings.defaults.coffee @@ -20,6 +20,9 @@ module.exports = port:"6379" host:"localhost" password:"" + zip: + minSize: 8*1024 + writesEnabled: true mongo: url: 'mongodb://127.0.0.1/sharelatex' diff --git a/services/document-updater/test/unit/coffee/RedisManager/clearDocFromPendingUpdatesSetTests.coffee b/services/document-updater/test/unit/coffee/RedisManager/clearDocFromPendingUpdatesSetTests.coffee index 81eb0bfefe..86ab837a2f 100644 --- a/services/document-updater/test/unit/coffee/RedisManager/clearDocFromPendingUpdatesSetTests.coffee +++ b/services/document-updater/test/unit/coffee/RedisManager/clearDocFromPendingUpdatesSetTests.coffee @@ -11,7 +11,7 @@ describe "RedisManager.clearDocFromPendingUpdatesSet", -> @callback = sinon.stub() @RedisManager = SandboxedModule.require modulePath, requires: "redis-sharelatex" : createClient: () => - @rclient = auth:-> + @rclient ?= auth:-> # only assign one rclient "logger-sharelatex": {} @rclient.srem = sinon.stub().callsArg(2) diff --git a/services/document-updater/test/unit/coffee/RedisManager/getDocsWithPendingUpdatesTests.coffee b/services/document-updater/test/unit/coffee/RedisManager/getDocsWithPendingUpdatesTests.coffee index 5bbb93a723..2f54ba171e 100644 --- a/services/document-updater/test/unit/coffee/RedisManager/getDocsWithPendingUpdatesTests.coffee +++ b/services/document-updater/test/unit/coffee/RedisManager/getDocsWithPendingUpdatesTests.coffee @@ -9,7 +9,7 @@ describe "RedisManager.getDocsWithPendingUpdates", -> @callback = sinon.stub() @RedisManager = SandboxedModule.require modulePath, requires: "redis-sharelatex" : createClient: () => - @rclient = auth:-> + @rclient ?= auth:-> "logger-sharelatex": {} @docs = [{ diff --git a/services/document-updater/test/unit/coffee/RedisManager/getPreviousDocOpsTests.coffee b/services/document-updater/test/unit/coffee/RedisManager/getPreviousDocOpsTests.coffee index 6cd4980fd8..4a6d42c1ab 100644 --- a/services/document-updater/test/unit/coffee/RedisManager/getPreviousDocOpsTests.coffee +++ b/services/document-updater/test/unit/coffee/RedisManager/getPreviousDocOpsTests.coffee @@ -9,7 +9,7 @@ describe "RedisManager.getPreviousDocOpsTests", -> @callback = sinon.stub() @RedisManager = SandboxedModule.require modulePath, requires: "redis-sharelatex" : createClient: () => - @rclient = + @rclient ?= auth: -> multi: => @rclient "logger-sharelatex": @logger = { error: sinon.stub(), log: sinon.stub() } diff --git a/services/document-updater/test/unit/coffee/RedisManager/pushDocOpTests.coffee b/services/document-updater/test/unit/coffee/RedisManager/pushDocOpTests.coffee index 71a36bb4f3..a90b20bced 100644 --- a/services/document-updater/test/unit/coffee/RedisManager/pushDocOpTests.coffee +++ b/services/document-updater/test/unit/coffee/RedisManager/pushDocOpTests.coffee @@ -8,7 +8,7 @@ describe "RedisManager.pushDocOp", -> beforeEach -> @RedisManager = SandboxedModule.require modulePath, requires: "redis-sharelatex": createClient: () => - @rclient = + @rclient ?= auth: () -> multi: () => @rclient "logger-sharelatex": @logger = {log: sinon.stub()} diff --git a/services/document-updater/test/unit/coffee/RedisManager/pushUncompressedHistoryOpTests.coffee b/services/document-updater/test/unit/coffee/RedisManager/pushUncompressedHistoryOpTests.coffee index 621a3b1a3b..82b28a25d2 100644 --- a/services/document-updater/test/unit/coffee/RedisManager/pushUncompressedHistoryOpTests.coffee +++ b/services/document-updater/test/unit/coffee/RedisManager/pushUncompressedHistoryOpTests.coffee @@ -8,7 +8,7 @@ describe "RedisManager.pushUncompressedHistoryOp", -> beforeEach -> @RedisManager = SandboxedModule.require modulePath, requires: "redis-sharelatex": createClient: () => - @rclient = + @rclient ?= auth: () -> multi: () => @rclient "logger-sharelatex": @logger = {log: sinon.stub()} From 6bffa4d9e0644bcaa93a795e35f796a79608200f Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Wed, 25 Mar 2015 16:54:36 +0000 Subject: [PATCH 02/13] don't log docLines when document removed from redis they can now be binary gzipped data which messes up the logs --- services/document-updater/app/coffee/RedisManager.coffee | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/services/document-updater/app/coffee/RedisManager.coffee b/services/document-updater/app/coffee/RedisManager.coffee index 523d7057a9..5ca4e1c174 100644 --- a/services/document-updater/app/coffee/RedisManager.coffee +++ b/services/document-updater/app/coffee/RedisManager.coffee @@ -32,7 +32,6 @@ module.exports = RedisManager = removeDocFromMemory : (project_id, doc_id, callback)-> logger.log project_id:project_id, doc_id:doc_id, "removing doc from redis" multi = rclient.multi() - multi.get keys.docLines(doc_id:doc_id) multi.del keys.docLines(doc_id:doc_id) multi.del keys.projectKey(doc_id:doc_id) multi.del keys.docVersion(doc_id:doc_id) @@ -43,8 +42,7 @@ module.exports = RedisManager = logger.err project_id:project_id, doc_id:doc_id, err:err, "error removing doc from redis" callback(err, null) else - docLines = replys[0] - logger.log project_id:project_id, doc_id:doc_id, docLines:docLines, "removed doc from redis" + logger.log project_id:project_id, doc_id:doc_id, "removed doc from redis" callback() getDoc : (doc_id, callback = (error, lines, version) ->)-> From 27d466aa85e6a7ba13792490afe9910586438b18 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Fri, 27 Mar 2015 13:17:38 +0000 Subject: [PATCH 03/13] added acceptance test for redis compression --- .../coffee/ApplyingUpdatesToADocTests.coffee | 150 ++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/services/document-updater/test/acceptance/coffee/ApplyingUpdatesToADocTests.coffee b/services/document-updater/test/acceptance/coffee/ApplyingUpdatesToADocTests.coffee index 1e9c2e2689..f213458168 100644 --- a/services/document-updater/test/acceptance/coffee/ApplyingUpdatesToADocTests.coffee +++ b/services/document-updater/test/acceptance/coffee/ApplyingUpdatesToADocTests.coffee @@ -238,3 +238,153 @@ describe "Applying updates to a doc", -> doc.lines.should.deep.equal @result done() + + + +describe "Applying updates to a large doc (uses compression)", -> + MIN_SIZE = 500000 + before -> + @lines = ["one", "two", "three"] + while @lines.join('').length < MIN_SIZE + @lines.push "this is a repeated long line which will create a large document which must be compressed #{@lines.length}" + @version = 42 + @update = + doc: @doc_id + op: [{ + i: "one and a half\n" + p: 4 + }] + v: @version + @result = @lines.slice() + @result.splice 1, 0, "one and a half" + + describe "when the document is not loaded", -> + before (done) -> + [@project_id, @doc_id] = [DocUpdaterClient.randomId(), DocUpdaterClient.randomId()] + sinon.spy MockWebApi, "getDocument" + + MockWebApi.insertDoc @project_id, @doc_id, lines: @lines + db.docOps.insert { + doc_id: ObjectId(@doc_id) + version: @version + }, (error) => + throw error if error? + DocUpdaterClient.sendUpdate @project_id, @doc_id, @update, (error) -> + throw error if error? + setTimeout done, 200 + + after -> + MockWebApi.getDocument.restore() + + it "should load the document from the web API", -> + MockWebApi.getDocument + .calledWith(@project_id, @doc_id) + .should.equal true + + it "should update the doc", (done) -> + DocUpdaterClient.getDoc @project_id, @doc_id, (error, res, doc) => + doc.lines.should.deep.equal @result + done() + + it "should push the applied updates to the track changes api", (done) -> + rclient.lrange "UncompressedHistoryOps:#{@doc_id}", 0, -1, (error, updates) => + throw error if error? + JSON.parse(updates[0]).op.should.deep.equal @update.op + rclient.sismember "DocsWithHistoryOps:#{@project_id}", @doc_id, (error, result) => + throw error if error? + result.should.equal 1 + done() + + + describe "when the document is loaded", -> + before (done) -> + [@project_id, @doc_id] = [DocUpdaterClient.randomId(), DocUpdaterClient.randomId()] + + MockWebApi.insertDoc @project_id, @doc_id, lines: @lines + db.docOps.insert doc_id: ObjectId(@doc_id), version: @version, (error) => + throw error if error? + DocUpdaterClient.preloadDoc @project_id, @doc_id, (error) => + throw error if error? + sinon.spy MockWebApi, "getDocument" + DocUpdaterClient.sendUpdate @project_id, @doc_id, @update, (error) -> + throw error if error? + setTimeout done, 200 + + after -> + MockWebApi.getDocument.restore() + + it "should not need to call the web api", -> + MockWebApi.getDocument.called.should.equal false + + it "should update the doc", (done) -> + DocUpdaterClient.getDoc @project_id, @doc_id, (error, res, doc) => + doc.lines.should.deep.equal @result + done() + + it "should push the applied updates to the track changes api", (done) -> + rclient.lrange "UncompressedHistoryOps:#{@doc_id}", 0, -1, (error, updates) => + JSON.parse(updates[0]).op.should.deep.equal @update.op + rclient.sismember "DocsWithHistoryOps:#{@project_id}", @doc_id, (error, result) => + result.should.equal 1 + done() + + describe "with a broken update", -> + before (done) -> + [@project_id, @doc_id] = [DocUpdaterClient.randomId(), DocUpdaterClient.randomId()] + MockWebApi.insertDoc @project_id, @doc_id, lines: @lines + db.docOps.insert doc_id: ObjectId(@doc_id), version: @version, (error) => + throw error if error? + DocUpdaterClient.sendUpdate @project_id, @doc_id, @undefined, (error) -> + throw error if error? + setTimeout done, 200 + + it "should not update the doc", (done) -> + DocUpdaterClient.getDoc @project_id, @doc_id, (error, res, doc) => + doc.lines.should.deep.equal @lines + done() + + describe "with enough updates to flush to the track changes api", -> + before (done) -> + [@project_id, @doc_id] = [DocUpdaterClient.randomId(), DocUpdaterClient.randomId()] + updates = [] + for v in [0..99] # Should flush after 50 ops + updates.push + doc_id: @doc_id, + op: [i: v.toString(), p: 0] + v: v + + sinon.spy MockTrackChangesApi, "flushDoc" + + MockWebApi.insertDoc @project_id, @doc_id, lines: @lines + db.docOps.insert doc_id: ObjectId(@doc_id), version: 0, (error) => + throw error if error? + DocUpdaterClient.sendUpdates @project_id, @doc_id, updates, (error) => + throw error if error? + setTimeout done, 200 + + after -> + MockTrackChangesApi.flushDoc.restore() + + it "should flush the doc twice", -> + MockTrackChangesApi.flushDoc.calledTwice.should.equal true + + describe "when there is no version in Mongo", -> + before (done) -> + [@project_id, @doc_id] = [DocUpdaterClient.randomId(), DocUpdaterClient.randomId()] + MockWebApi.insertDoc @project_id, @doc_id, { + lines: @lines + } + + update = + doc: @doc_id + op: @update.op + v: 0 + DocUpdaterClient.sendUpdate @project_id, @doc_id, update, (error) -> + throw error if error? + setTimeout done, 200 + + it "should update the doc (using version = 0)", (done) -> + DocUpdaterClient.getDoc @project_id, @doc_id, (error, res, doc) => + doc.lines.should.deep.equal @result + done() + From 66fa170ac83ad4a43f8b7feba6454011ada45713 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Fri, 27 Mar 2015 15:32:13 +0000 Subject: [PATCH 04/13] disable compression by default --- services/document-updater/app/coffee/RedisManager.coffee | 2 +- services/document-updater/config/settings.defaults.coffee | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/services/document-updater/app/coffee/RedisManager.coffee b/services/document-updater/app/coffee/RedisManager.coffee index 5ca4e1c174..b08687a170 100644 --- a/services/document-updater/app/coffee/RedisManager.coffee +++ b/services/document-updater/app/coffee/RedisManager.coffee @@ -18,7 +18,7 @@ minutes = 60 # seconds for Redis expire module.exports = RedisManager = putDocInMemory : (project_id, doc_id, docLines, version, callback)-> timer = new metrics.Timer("redis.put-doc") - logger.log project_id:project_id, doc_id:doc_id, docLines:docLines, version: version, "putting doc in redis" + logger.log project_id:project_id, doc_id:doc_id, version: version, "putting doc in redis" multi = rclient.multi() multi.set keys.docLines(doc_id:doc_id), JSON.stringify(docLines) multi.set keys.projectKey({doc_id:doc_id}), project_id diff --git a/services/document-updater/config/settings.defaults.coffee b/services/document-updater/config/settings.defaults.coffee index 37be211f6e..babdaafdc3 100755 --- a/services/document-updater/config/settings.defaults.coffee +++ b/services/document-updater/config/settings.defaults.coffee @@ -21,8 +21,8 @@ module.exports = host:"localhost" password:"" zip: - minSize: 8*1024 - writesEnabled: true + minSize: 10*1024 + writesEnabled: false mongo: url: 'mongodb://127.0.0.1/sharelatex' From 03564b21387fe3c76b2d05764b16a6795065fff4 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Fri, 27 Mar 2015 17:00:30 +0000 Subject: [PATCH 05/13] fix variable zip.minsize to match config name zip.minSize --- services/document-updater/app/coffee/ZipManager.coffee | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/document-updater/app/coffee/ZipManager.coffee b/services/document-updater/app/coffee/ZipManager.coffee index 3809d1c7da..2484a686ac 100644 --- a/services/document-updater/app/coffee/ZipManager.coffee +++ b/services/document-updater/app/coffee/ZipManager.coffee @@ -4,7 +4,7 @@ metrics = require('./Metrics') zlib = require('zlib') ZIP_WRITES_ENABLED = Settings.redis.zip?.writesEnabled? -ZIP_MINSIZE = Settings.redis.zip?.minsize || 64*1024 +ZIP_MINSIZE = Settings.redis.zip?.minSize || 64*1024 module.exports = ZipManager = uncompressIfNeeded: (doc_id, result, callback) -> From 86505047a3786fd23c4d4af64edc0f3362318562 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Fri, 27 Mar 2015 17:04:38 +0000 Subject: [PATCH 06/13] added unit test (work in progress) --- .../test/unit/coffee/Compression.coffee | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 services/document-updater/test/unit/coffee/Compression.coffee diff --git a/services/document-updater/test/unit/coffee/Compression.coffee b/services/document-updater/test/unit/coffee/Compression.coffee new file mode 100644 index 0000000000..bba1dd5c3e --- /dev/null +++ b/services/document-updater/test/unit/coffee/Compression.coffee @@ -0,0 +1,100 @@ +sinon = require('sinon') +chai = require('chai') +should = chai.should() +modulePath = "../../../app/js/RedisManager" +SandboxedModule = require('sandboxed-module') + +describe "RedisManager.setDocument and getDocument", -> + beforeEach -> + @zip_opts = + writesEnabled: true + minSize: 1000 + @doc_id = "document-id" + @version = 123 + @RedisManager = SandboxedModule.require modulePath, requires: + "settings-sharelatex" : + redis: + web: + host: 'none' + port: 'none' + zip: @zip_opts + "redis-sharelatex" : createClient: () => + @rclient ?= + auth:-> # only assign one rclient + multi: () => @rclient + set: (key, value) => @rclient.store[key] = value + get: (key) => @rclient.results.push @rclient.store[key] + incr: (key) => @rclient.store[key]++ + exec: (callback) => + callback.apply(null, [null, @rclient.results]) + @rclient.results = [] + store: {} + results: [] + "logger-sharelatex": {} + + @RedisManager.setDocument(@doc_id, @docLines, @version, @callback) + + describe "for a small document (uncompressed)", -> + before -> + @docLines = ["hello", "world"] + @callback = sinon.stub() + + it "should set the document", -> + @rclient.store['doclines:document-id'] + .should.equal JSON.stringify(@docLines) + + it "should return the callback", -> + @callback.called.should.equal true + + it "should get the document back again", (done) -> + @RedisManager.getDoc @doc_id, (err, lines, version) => + @docLines.should.eql lines + done() + + describe "for a large document (with compression enabled)", -> + before -> + @zip_opts = + writesEnabled: true + minSize: 1000 + @docLines = [] + while @docLines.join('').length <= @zip_opts.minSize + @docLines.push "this is a long line in a long document" + @callback = sinon.stub() + + it "should set the document as a gzipped blob", -> + @rclient.store['doclines:document-id'] + .should.not.equal JSON.stringify(@docLines) + + it "should return the callback", -> + @callback.called.should.equal true + + it "should get the uncompressed document back again", (done) -> + @RedisManager.getDoc @doc_id, (err, lines, version) => + @docLines.should.eql lines + done() + + describe "for a large document (with compression disabled)", -> + before -> + @zip_opts = + writesEnabled: false + minSize: 1000 + @docLines = [] + while @docLines.join('').length <= @zip_opts.minSize + @docLines.push "this is a long line in a long document" + @callback = sinon.stub() + + it "should set the document", -> + @rclient.store['doclines:document-id'] + .should.equal JSON.stringify(@docLines) + + it "should return the callback", -> + @callback.called.should.equal true + + it "should get the document back again", (done) -> + @RedisManager.getDoc @doc_id, (err, lines, version) => + @docLines.should.eql lines + done() + + + + From 8e8ee5b3dad873e3466553c8ae8954fa78c1e4d9 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Mon, 30 Mar 2015 16:41:23 +0100 Subject: [PATCH 07/13] fix tests --- .../test/unit/coffee/Compression.coffee | 100 ----------- .../unit/coffee/ZipManager/ZipManager.coffee | 158 ++++++++++++++++++ 2 files changed, 158 insertions(+), 100 deletions(-) delete mode 100644 services/document-updater/test/unit/coffee/Compression.coffee create mode 100644 services/document-updater/test/unit/coffee/ZipManager/ZipManager.coffee diff --git a/services/document-updater/test/unit/coffee/Compression.coffee b/services/document-updater/test/unit/coffee/Compression.coffee deleted file mode 100644 index bba1dd5c3e..0000000000 --- a/services/document-updater/test/unit/coffee/Compression.coffee +++ /dev/null @@ -1,100 +0,0 @@ -sinon = require('sinon') -chai = require('chai') -should = chai.should() -modulePath = "../../../app/js/RedisManager" -SandboxedModule = require('sandboxed-module') - -describe "RedisManager.setDocument and getDocument", -> - beforeEach -> - @zip_opts = - writesEnabled: true - minSize: 1000 - @doc_id = "document-id" - @version = 123 - @RedisManager = SandboxedModule.require modulePath, requires: - "settings-sharelatex" : - redis: - web: - host: 'none' - port: 'none' - zip: @zip_opts - "redis-sharelatex" : createClient: () => - @rclient ?= - auth:-> # only assign one rclient - multi: () => @rclient - set: (key, value) => @rclient.store[key] = value - get: (key) => @rclient.results.push @rclient.store[key] - incr: (key) => @rclient.store[key]++ - exec: (callback) => - callback.apply(null, [null, @rclient.results]) - @rclient.results = [] - store: {} - results: [] - "logger-sharelatex": {} - - @RedisManager.setDocument(@doc_id, @docLines, @version, @callback) - - describe "for a small document (uncompressed)", -> - before -> - @docLines = ["hello", "world"] - @callback = sinon.stub() - - it "should set the document", -> - @rclient.store['doclines:document-id'] - .should.equal JSON.stringify(@docLines) - - it "should return the callback", -> - @callback.called.should.equal true - - it "should get the document back again", (done) -> - @RedisManager.getDoc @doc_id, (err, lines, version) => - @docLines.should.eql lines - done() - - describe "for a large document (with compression enabled)", -> - before -> - @zip_opts = - writesEnabled: true - minSize: 1000 - @docLines = [] - while @docLines.join('').length <= @zip_opts.minSize - @docLines.push "this is a long line in a long document" - @callback = sinon.stub() - - it "should set the document as a gzipped blob", -> - @rclient.store['doclines:document-id'] - .should.not.equal JSON.stringify(@docLines) - - it "should return the callback", -> - @callback.called.should.equal true - - it "should get the uncompressed document back again", (done) -> - @RedisManager.getDoc @doc_id, (err, lines, version) => - @docLines.should.eql lines - done() - - describe "for a large document (with compression disabled)", -> - before -> - @zip_opts = - writesEnabled: false - minSize: 1000 - @docLines = [] - while @docLines.join('').length <= @zip_opts.minSize - @docLines.push "this is a long line in a long document" - @callback = sinon.stub() - - it "should set the document", -> - @rclient.store['doclines:document-id'] - .should.equal JSON.stringify(@docLines) - - it "should return the callback", -> - @callback.called.should.equal true - - it "should get the document back again", (done) -> - @RedisManager.getDoc @doc_id, (err, lines, version) => - @docLines.should.eql lines - done() - - - - diff --git a/services/document-updater/test/unit/coffee/ZipManager/ZipManager.coffee b/services/document-updater/test/unit/coffee/ZipManager/ZipManager.coffee new file mode 100644 index 0000000000..11d1839994 --- /dev/null +++ b/services/document-updater/test/unit/coffee/ZipManager/ZipManager.coffee @@ -0,0 +1,158 @@ +sinon = require('sinon') +chai = require('chai') +should = chai.should() +zipModulePath = "../../../../app/js/ZipManager" +redisModulePath = "../../../../app/js/RedisManager" +SandboxedModule = require('sandboxed-module') + +MIN_SIZE = 9999 + +describe "ZipManager with RedisManager", -> + describe "for a small document (uncompressed)", -> + rclient = null + beforeEach (done) -> + @ZipManager = SandboxedModule.require zipModulePath, requires: + 'settings-sharelatex': redis: + web: + host: 'none' + port: 'none' + zip: + writesEnabled: true + minSize: MIN_SIZE + @RedisManager = SandboxedModule.require redisModulePath, requires: + "./ZipManager" : @ZipManager + "redis-sharelatex" : createClient: () => + rclient ?= + auth:-> # only assign one rclient + multi: () => + console.log 'returning rclient', rclient + rclient + set: (key, value) => rclient.store[key] = value + get: (key) => + console.log 'GET', key + rclient.results.push rclient.store[key] + incr: (key) => rclient.store[key]++ + exec: (callback) => + callback.apply(null, [null, rclient.results]) + rclient.results = [] + store: {} + results: [] + "logger-sharelatex": {} + @doc_id = "document-id" + @version = 123 + + @docLines = ["hello", "world"] + @callback = sinon.stub() + + @RedisManager.setDocument @doc_id, @docLines, @version, () => + @callback() + done() + + it "should set the document", -> + rclient.store['doclines:document-id'] + .should.equal JSON.stringify(@docLines) + + it "should return the callback", -> + @callback.called.should.equal true + + it "should get the document back again", (done) -> + @RedisManager.getDoc @doc_id, (err, lines, version) => + @docLines.should.eql lines + done() + + describe "for a large document (with compression enabled)", -> + rclient = null + beforeEach (done) -> + @ZipManager = SandboxedModule.require zipModulePath, requires: + 'settings-sharelatex': redis: + web: + host: 'none' + port: 'none' + zip: + writesEnabled: true + minSize: MIN_SIZE + @RedisManager = SandboxedModule.require redisModulePath, requires: + "./ZipManager" : @ZipManager + "redis-sharelatex" : createClient: () => + rclient ?= + auth:-> # only assign one rclient + multi: () => rclient + set: (key, value) => rclient.store[key] = value + get: (key) => rclient.results.push rclient.store[key] + incr: (key) => rclient.store[key]++ + exec: (callback) => + callback.apply(null, [null, rclient.results]) + rclient.results = [] + store: {} + results: [] + "logger-sharelatex": {} + @doc_id = "document-id" + @version = 123 + + @docLines = [] + while @docLines.join('').length <= MIN_SIZE + @docLines.push "this is a long line in a long document" + @callback = sinon.stub() + @RedisManager.setDocument @doc_id, @docLines, @version, () => + @callback() + done() + + it "should set the document as a gzipped blob", -> + rclient.store['doclines:document-id'] + .should.not.equal JSON.stringify(@docLines) + + it "should return the callback", -> + @callback.called.should.equal true + + it "should get the uncompressed document back again", (done) -> + @RedisManager.getDoc @doc_id, (err, lines, version) => + @docLines.should.eql lines + done() + + describe "for a large document (with compression disabled)", -> + rclient = null + beforeEach (done) -> + @ZipManager = SandboxedModule.require zipModulePath, requires: + 'settings-sharelatex': redis: + web: + host: 'none' + port: 'none' + zip: + writesEnabled: false + minSize: MIN_SIZE + @RedisManager = SandboxedModule.require redisModulePath, requires: + "./ZipManager" : @ZipManager + "redis-sharelatex" : createClient: () => + rclient ?= + auth:-> # only assign one rclient + multi: () => rclient + set: (key, value) => rclient.store[key] = value + get: (key) => rclient.results.push rclient.store[key] + incr: (key) => rclient.store[key]++ + exec: (callback) => + callback.apply(null, [null, rclient.results]) + rclient.results = [] + store: {} + results: [] + "logger-sharelatex": {} + @doc_id = "document-id" + @version = 123 + @docLines = [] + while @docLines.join('').length <= MIN_SIZE + @docLines.push "this is a long line in a long document" + @callback = sinon.stub() + @RedisManager.setDocument @doc_id, @docLines, @version, () => + @callback() + done() + + it "should set the document", -> + rclient.store['doclines:document-id'] + .should.equal JSON.stringify(@docLines) + + it "should return the callback", -> + @callback.called.should.equal true + + it "should get the document back again", (done) -> + @RedisManager.getDoc @doc_id, (err, lines, version) => + @docLines.should.eql lines + done() From c8c12e8b410f257e3a1367d76105bac25586de34 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Mon, 30 Mar 2015 16:41:45 +0100 Subject: [PATCH 08/13] fix error in ZipManager writesEnabled setting --- services/document-updater/app/coffee/ZipManager.coffee | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/document-updater/app/coffee/ZipManager.coffee b/services/document-updater/app/coffee/ZipManager.coffee index 2484a686ac..7009362a7e 100644 --- a/services/document-updater/app/coffee/ZipManager.coffee +++ b/services/document-updater/app/coffee/ZipManager.coffee @@ -3,7 +3,7 @@ logger = require('logger-sharelatex') metrics = require('./Metrics') zlib = require('zlib') -ZIP_WRITES_ENABLED = Settings.redis.zip?.writesEnabled? +ZIP_WRITES_ENABLED = Settings.redis.zip?.writesEnabled ZIP_MINSIZE = Settings.redis.zip?.minSize || 64*1024 module.exports = ZipManager = From 495af5d568fa34d6b1c9f12780fbf63b3d16a6b7 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Mon, 30 Mar 2015 16:56:07 +0100 Subject: [PATCH 09/13] remove console.logs from tests --- .../test/unit/coffee/ZipManager/ZipManager.coffee | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/services/document-updater/test/unit/coffee/ZipManager/ZipManager.coffee b/services/document-updater/test/unit/coffee/ZipManager/ZipManager.coffee index 11d1839994..c491c29587 100644 --- a/services/document-updater/test/unit/coffee/ZipManager/ZipManager.coffee +++ b/services/document-updater/test/unit/coffee/ZipManager/ZipManager.coffee @@ -24,13 +24,9 @@ describe "ZipManager with RedisManager", -> "redis-sharelatex" : createClient: () => rclient ?= auth:-> # only assign one rclient - multi: () => - console.log 'returning rclient', rclient - rclient + multi: () => rclient set: (key, value) => rclient.store[key] = value - get: (key) => - console.log 'GET', key - rclient.results.push rclient.store[key] + get: (key) => rclient.results.push rclient.store[key] incr: (key) => rclient.store[key]++ exec: (callback) => callback.apply(null, [null, rclient.results]) From e61beed92f8175c857c6d5068d02a178633a87c5 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Mon, 30 Mar 2015 16:58:00 +0100 Subject: [PATCH 10/13] suppress logging in ZipManager tests --- .../test/unit/coffee/ZipManager/ZipManager.coffee | 3 +++ 1 file changed, 3 insertions(+) diff --git a/services/document-updater/test/unit/coffee/ZipManager/ZipManager.coffee b/services/document-updater/test/unit/coffee/ZipManager/ZipManager.coffee index c491c29587..0ae53ebc19 100644 --- a/services/document-updater/test/unit/coffee/ZipManager/ZipManager.coffee +++ b/services/document-updater/test/unit/coffee/ZipManager/ZipManager.coffee @@ -12,6 +12,7 @@ describe "ZipManager with RedisManager", -> rclient = null beforeEach (done) -> @ZipManager = SandboxedModule.require zipModulePath, requires: + "logger-sharelatex": log:-> 'settings-sharelatex': redis: web: host: 'none' @@ -60,6 +61,7 @@ describe "ZipManager with RedisManager", -> rclient = null beforeEach (done) -> @ZipManager = SandboxedModule.require zipModulePath, requires: + "logger-sharelatex": log:-> 'settings-sharelatex': redis: web: host: 'none' @@ -109,6 +111,7 @@ describe "ZipManager with RedisManager", -> rclient = null beforeEach (done) -> @ZipManager = SandboxedModule.require zipModulePath, requires: + "logger-sharelatex": log:-> 'settings-sharelatex': redis: web: host: 'none' From 9bb08d7ba50c465f9075ebb91e57ca1b8bf8832b Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Tue, 31 Mar 2015 10:07:11 +0100 Subject: [PATCH 11/13] add ZipManager comments --- .../app/coffee/ZipManager.coffee | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/services/document-updater/app/coffee/ZipManager.coffee b/services/document-updater/app/coffee/ZipManager.coffee index 7009362a7e..6011fee659 100644 --- a/services/document-updater/app/coffee/ZipManager.coffee +++ b/services/document-updater/app/coffee/ZipManager.coffee @@ -3,6 +3,31 @@ logger = require('logger-sharelatex') metrics = require('./Metrics') zlib = require('zlib') +# Compress and uncompress data sent to Redis using the node 'zlib' +# module, to reduce load on Redis. +# +# Measurements show that most of the load on Redis comes from a very +# large documents. We can shift some of that CPU load from redis to +# the docupdaters (which are scalable) by compressing the data in the +# docupdater first. +# +# To avoid overloading the docupdater clients we impose a minimum size +# on data we will compress, so we only catch the large ones. +# +# The optimimum size for the cutoff is about 10K, below this we do +# more work but don't really gain any extra reduction in Redis CPU +# +# |--------------------+-----------+--------------------------| +# | Compression cutoff | Redis CPU | Extra doc updater CPU(*) | +# |--------------------+-----------+--------------------------| +# | N/A | 100% | 0% | +# | 100k | 80% | 10% | +# | 10k | 50% | 30% | +# |--------------------+-----------+--------------------------| +# +# (*) percentage of a single core, because node zlib runs in multiple +# threads. + ZIP_WRITES_ENABLED = Settings.redis.zip?.writesEnabled ZIP_MINSIZE = Settings.redis.zip?.minSize || 64*1024 @@ -18,7 +43,7 @@ module.exports = ZipManager = # now uncompress the text (result[0]) if needed buf = result?[0] - # Check if we have a GZIP file + # Check if we have a GZIP file (magic numbers in header) if buf? and buf[0] == 0x1F and buf[1] == 0x8B zlib.gunzip buf, (err, newbuf) -> if err? From 36f60d5bce9bac735b2f6d53f5ace582b2fd0f97 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Tue, 31 Mar 2015 10:07:39 +0100 Subject: [PATCH 12/13] enforce minimum size of 1k for compression --- services/document-updater/app/coffee/ZipManager.coffee | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/document-updater/app/coffee/ZipManager.coffee b/services/document-updater/app/coffee/ZipManager.coffee index 6011fee659..18482a5c69 100644 --- a/services/document-updater/app/coffee/ZipManager.coffee +++ b/services/document-updater/app/coffee/ZipManager.coffee @@ -60,7 +60,8 @@ module.exports = ZipManager = callback(null, result) compressIfNeeded: (doc_id, text, callback) -> - if ZIP_WRITES_ENABLED && ZIP_MINSIZE > 0 and text.length > ZIP_MINSIZE + if ZIP_WRITES_ENABLED and ZIP_MINSIZE > 1024 and text.length > ZIP_MINSIZE + # N.B. skip files of 1k or less, because gzip increases the size zlib.gzip text, (err, buf) -> if err? logger.err doc_id:doc_id, err:err, "error compressing doc" From 6cdf5615fc0c36df96b2df47eff7777be5eaa037 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Tue, 31 Mar 2015 10:24:09 +0100 Subject: [PATCH 13/13] remove old unused functions --- .../app/coffee/RedisManager.coffee | 22 ------------------- 1 file changed, 22 deletions(-) diff --git a/services/document-updater/app/coffee/RedisManager.coffee b/services/document-updater/app/coffee/RedisManager.coffee index b08687a170..d280de1cea 100644 --- a/services/document-updater/app/coffee/RedisManager.coffee +++ b/services/document-updater/app/coffee/RedisManager.coffee @@ -176,25 +176,3 @@ module.exports = RedisManager = getDocIdsInProject: (project_id, callback = (error, doc_ids) ->) -> rclient.smembers keys.docsInProject(project_id: project_id), callback - - -getDocumentsProjectId = (doc_id, callback)-> - rclient.get keys.projectKey({doc_id:doc_id}), (err, project_id)-> - callback err, {doc_id:doc_id, project_id:project_id} - -getAllProjectDocsIds = (project_id, callback)-> - rclient.SMEMBERS keys.docsInProject(project_id:project_id), (err, doc_ids)-> - if callback? - callback(err, doc_ids) - -getDocumentsAndExpire = (doc_ids, callback)-> - multi = rclient.multi() - oneDay = 86400 - doc_ids.forEach (doc_id)-> - # rclient.expire keys.docLines(doc_id:doc_id), oneDay, -> - doc_ids.forEach (doc_id)-> - multi.get keys.docLines(doc_id:doc_id) - multi.exec (err, docsLines)-> - callback err, docsLines - -