diff --git a/services/document-updater/app/coffee/RedisManager.coffee b/services/document-updater/app/coffee/RedisManager.coffee index b7ebb2cd4f..4b66e178e2 100644 --- a/services/document-updater/app/coffee/RedisManager.coffee +++ b/services/document-updater/app/coffee/RedisManager.coffee @@ -6,12 +6,10 @@ _ = require('underscore') keys = require('./RedisKeyBuilder') logger = require('logger-sharelatex') metrics = require('./Metrics') -ZipManager = require('./ZipManager') Errors = require "./Errors" redisOptions = _.clone(Settings.redis.web) redisOptions.return_buffers = true -rclientBuffer = redis.createClient(redisOptions) # Make times easy to read minutes = 60 # seconds for Redis expire @@ -48,21 +46,18 @@ module.exports = RedisManager = getDoc : (doc_id, callback = (error, lines, version) ->)-> timer = new metrics.Timer("redis.get-doc") - # use Buffer when retrieving data as it may be gzipped - multi = rclientBuffer.multi() - linesKey = keys.docLines(doc_id:doc_id) - multi.get linesKey + multi = rclient.multi() + multi.get keys.docLines(doc_id:doc_id) multi.get keys.docVersion(doc_id:doc_id) multi.exec (error, result)-> timer.done() return callback(error) if error? - ZipManager.uncompressIfNeeded doc_id, result, (error, result) -> - try - docLines = JSON.parse result[0] - catch e - return callback(e) - version = parseInt(result[1] or 0, 10) - callback null, docLines, version + try + docLines = JSON.parse result[0] + catch e + return callback(e) + version = parseInt(result[1] or 0, 10) + callback null, docLines, version getDocVersion: (doc_id, callback = (error, version) ->) -> rclient.get keys.docVersion(doc_id: doc_id), (error, version) -> @@ -76,12 +71,11 @@ module.exports = RedisManager = callback null, len setDocument : (doc_id, docLines, version, callback = (error) ->)-> - ZipManager.compressIfNeeded doc_id, JSON.stringify(docLines), (err, result) -> - multi = rclient.multi() - multi.set keys.docLines(doc_id:doc_id), result - multi.set keys.docVersion(doc_id:doc_id), version - multi.incr keys.now("docsets") - multi.exec (error, replys) -> callback(error) + multi = rclient.multi() + multi.set keys.docLines(doc_id:doc_id), JSON.stringify(docLines) + multi.set keys.docVersion(doc_id:doc_id), version + multi.incr keys.now("docsets") + multi.exec (error, replys) -> callback(error) getPendingUpdatesForDoc : (doc_id, callback)-> multi = rclient.multi() diff --git a/services/document-updater/app/coffee/ZipManager.coffee b/services/document-updater/app/coffee/ZipManager.coffee deleted file mode 100644 index 18482a5c69..0000000000 --- a/services/document-updater/app/coffee/ZipManager.coffee +++ /dev/null @@ -1,73 +0,0 @@ -Settings = require('settings-sharelatex') -logger = require('logger-sharelatex') -metrics = require('./Metrics') -zlib = require('zlib') - -# Compress and uncompress data sent to Redis using the node 'zlib' -# module, to reduce load on Redis. -# -# Measurements show that most of the load on Redis comes from a very -# large documents. We can shift some of that CPU load from redis to -# the docupdaters (which are scalable) by compressing the data in the -# docupdater first. -# -# To avoid overloading the docupdater clients we impose a minimum size -# on data we will compress, so we only catch the large ones. -# -# The optimimum size for the cutoff is about 10K, below this we do -# more work but don't really gain any extra reduction in Redis CPU -# -# |--------------------+-----------+--------------------------| -# | Compression cutoff | Redis CPU | Extra doc updater CPU(*) | -# |--------------------+-----------+--------------------------| -# | N/A | 100% | 0% | -# | 100k | 80% | 10% | -# | 10k | 50% | 30% | -# |--------------------+-----------+--------------------------| -# -# (*) percentage of a single core, because node zlib runs in multiple -# threads. - -ZIP_WRITES_ENABLED = Settings.redis.zip?.writesEnabled -ZIP_MINSIZE = Settings.redis.zip?.minSize || 64*1024 - -module.exports = ZipManager = - uncompressIfNeeded: (doc_id, result, callback) -> - # result is an array of [text, version]. Each entry is a node - # Buffer object which we need to convert to strings on output - - # first make sure the version (result[1]) is returned as a string - if result?[1]?.toString? - result[1] = result[1].toString() - - # now uncompress the text (result[0]) if needed - buf = result?[0] - - # Check if we have a GZIP file (magic numbers in header) - if buf? and buf[0] == 0x1F and buf[1] == 0x8B - zlib.gunzip buf, (err, newbuf) -> - if err? - logger.err doc_id:doc_id, err:err, "error uncompressing doc" - callback(err, null) - else - logger.log doc_id:doc_id, fromBytes: buf.length, toChars: newbuf.length, factor: buf.length/newbuf.length, "uncompressed successfully" - result[0] = newbuf.toString() - callback(null, result) - else - # if we don't have a GZIP file it's just a buffer of text, convert it back to a string - if buf?.toString? - result[0] = buf.toString() - callback(null, result) - - compressIfNeeded: (doc_id, text, callback) -> - if ZIP_WRITES_ENABLED and ZIP_MINSIZE > 1024 and text.length > ZIP_MINSIZE - # N.B. skip files of 1k or less, because gzip increases the size - zlib.gzip text, (err, buf) -> - if err? - logger.err doc_id:doc_id, err:err, "error compressing doc" - callback(err, null) - else - logger.log doc_id:doc_id, fromChars: text.length, toBytes: buf.length, factor: buf.length/text.length , "compressed successfully" - callback(null, buf) - else - callback(null, text) diff --git a/services/document-updater/test/unit/coffee/ZipManager/ZipManager.coffee b/services/document-updater/test/unit/coffee/ZipManager/ZipManager.coffee deleted file mode 100644 index e477cfb23a..0000000000 --- a/services/document-updater/test/unit/coffee/ZipManager/ZipManager.coffee +++ /dev/null @@ -1,169 +0,0 @@ -sinon = require('sinon') -chai = require('chai') -should = chai.should() -zipModulePath = "../../../../app/js/ZipManager" -redisModulePath = "../../../../app/js/RedisManager" -SandboxedModule = require('sandboxed-module') -zlib = require('zlib') - -MIN_SIZE = 9999 - -describe "ZipManager with RedisManager", -> - describe "for a small document (uncompressed)", -> - rclient = null - beforeEach (done) -> - @ZipManager = SandboxedModule.require zipModulePath, requires: - "logger-sharelatex": log:-> - 'settings-sharelatex': redis: - web: - host: 'none' - port: 'none' - zip: - writesEnabled: true - minSize: MIN_SIZE - @RedisManager = SandboxedModule.require redisModulePath, requires: - "./ZipManager" : @ZipManager - "redis-sharelatex" : createClient: () => - rclient ?= - auth:-> # only assign one rclient - multi: () => rclient - set: (key, value) => rclient.store[key] = value - get: (key) => rclient.results.push rclient.store[key] - incr: (key) => rclient.store[key]++ - exec: (callback) => - callback.apply(null, [null, rclient.results]) - rclient.results = [] - store: {} - results: [] - "logger-sharelatex": {} - @doc_id = "document-id" - @version = 123 - - @docLines = ["hello", "world"] - @callback = sinon.stub() - - @RedisManager.setDocument @doc_id, @docLines, @version, () => - @callback() - done() - - it "should set the document", -> - rclient.store['doclines:document-id'] - .should.equal JSON.stringify(@docLines) - - it "should return the callback", -> - @callback.called.should.equal true - - it "should get the document back again", (done) -> - @RedisManager.getDoc @doc_id, (err, lines, version) => - @docLines.should.eql lines - done() - - describe "calling node zlib.gzip directly", -> - it "should compress the string 'hello world' within the timeout", (done) -> - zlib.gzip "hello world", done - - it "should compress a 10k string within the timeout", (done) -> - text = "" - while text.length < 10*1024 - text = text + "helloworld" - zlib.gzip text, done - - describe "for a large document (with compression enabled)", -> - rclient = null - beforeEach (done) -> - @ZipManager = SandboxedModule.require zipModulePath, requires: - "logger-sharelatex": log:-> - 'settings-sharelatex': redis: - web: - host: 'none' - port: 'none' - zip: - writesEnabled: true - minSize: MIN_SIZE - @RedisManager = SandboxedModule.require redisModulePath, requires: - "./ZipManager" : @ZipManager - "redis-sharelatex" : createClient: () => - rclient ?= - auth:-> # only assign one rclient - multi: () => rclient - set: (key, value) => rclient.store[key] = value - get: (key) => rclient.results.push rclient.store[key] - incr: (key) => rclient.store[key]++ - exec: (callback) => - callback.apply(null, [null, rclient.results]) - rclient.results = [] - store: {} - results: [] - "logger-sharelatex": {} - @doc_id = "document-id" - @version = 123 - - @docLines = [] - while @docLines.join('').length <= MIN_SIZE - @docLines.push "this is a long line in a long document" - # console.log "length of doclines", @docLines.join('').length - @callback = sinon.stub() - @RedisManager.setDocument @doc_id, @docLines, @version, () => - @callback() - done() - - it "should set the document as a gzipped blob", -> - rclient.store['doclines:document-id'] - .should.not.equal JSON.stringify(@docLines) - - it "should return the callback", -> - @callback.called.should.equal true - - it "should get the uncompressed document back again", (done) -> - @RedisManager.getDoc @doc_id, (err, lines, version) => - @docLines.should.eql lines - done() - - describe "for a large document (with compression disabled)", -> - rclient = null - beforeEach (done) -> - @ZipManager = SandboxedModule.require zipModulePath, requires: - "logger-sharelatex": log:-> - 'settings-sharelatex': redis: - web: - host: 'none' - port: 'none' - zip: - writesEnabled: false - minSize: MIN_SIZE - @RedisManager = SandboxedModule.require redisModulePath, requires: - "./ZipManager" : @ZipManager - "redis-sharelatex" : createClient: () => - rclient ?= - auth:-> # only assign one rclient - multi: () => rclient - set: (key, value) => rclient.store[key] = value - get: (key) => rclient.results.push rclient.store[key] - incr: (key) => rclient.store[key]++ - exec: (callback) => - callback.apply(null, [null, rclient.results]) - rclient.results = [] - store: {} - results: [] - "logger-sharelatex": {} - @doc_id = "document-id" - @version = 123 - @docLines = [] - while @docLines.join('').length <= MIN_SIZE - @docLines.push "this is a long line in a long document" - @callback = sinon.stub() - @RedisManager.setDocument @doc_id, @docLines, @version, () => - @callback() - done() - - it "should set the document", -> - rclient.store['doclines:document-id'] - .should.equal JSON.stringify(@docLines) - - it "should return the callback", -> - @callback.called.should.equal true - - it "should get the document back again", (done) -> - @RedisManager.getDoc @doc_id, (err, lines, version) => - @docLines.should.eql lines - done()