Remove unused gzipping logic

This commit is contained in:
James Allen 2016-06-01 10:53:50 +01:00
parent 1d7cbd8a78
commit 32ce44f9c6
3 changed files with 13 additions and 261 deletions

View file

@ -6,12 +6,10 @@ _ = require('underscore')
keys = require('./RedisKeyBuilder')
logger = require('logger-sharelatex')
metrics = require('./Metrics')
ZipManager = require('./ZipManager')
Errors = require "./Errors"
redisOptions = _.clone(Settings.redis.web)
redisOptions.return_buffers = true
rclientBuffer = redis.createClient(redisOptions)
# Make times easy to read
minutes = 60 # seconds for Redis expire
@ -48,21 +46,18 @@ module.exports = RedisManager =
getDoc : (doc_id, callback = (error, lines, version) ->)->
timer = new metrics.Timer("redis.get-doc")
# use Buffer when retrieving data as it may be gzipped
multi = rclientBuffer.multi()
linesKey = keys.docLines(doc_id:doc_id)
multi.get linesKey
multi = rclient.multi()
multi.get keys.docLines(doc_id:doc_id)
multi.get keys.docVersion(doc_id:doc_id)
multi.exec (error, result)->
timer.done()
return callback(error) if error?
ZipManager.uncompressIfNeeded doc_id, result, (error, result) ->
try
docLines = JSON.parse result[0]
catch e
return callback(e)
version = parseInt(result[1] or 0, 10)
callback null, docLines, version
try
docLines = JSON.parse result[0]
catch e
return callback(e)
version = parseInt(result[1] or 0, 10)
callback null, docLines, version
getDocVersion: (doc_id, callback = (error, version) ->) ->
rclient.get keys.docVersion(doc_id: doc_id), (error, version) ->
@ -76,12 +71,11 @@ module.exports = RedisManager =
callback null, len
setDocument : (doc_id, docLines, version, callback = (error) ->)->
ZipManager.compressIfNeeded doc_id, JSON.stringify(docLines), (err, result) ->
multi = rclient.multi()
multi.set keys.docLines(doc_id:doc_id), result
multi.set keys.docVersion(doc_id:doc_id), version
multi.incr keys.now("docsets")
multi.exec (error, replys) -> callback(error)
multi = rclient.multi()
multi.set keys.docLines(doc_id:doc_id), JSON.stringify(docLines)
multi.set keys.docVersion(doc_id:doc_id), version
multi.incr keys.now("docsets")
multi.exec (error, replys) -> callback(error)
getPendingUpdatesForDoc : (doc_id, callback)->
multi = rclient.multi()

View file

@ -1,73 +0,0 @@
Settings = require('settings-sharelatex')
logger = require('logger-sharelatex')
metrics = require('./Metrics')
zlib = require('zlib')
# Compress and uncompress data sent to Redis using the node 'zlib'
# module, to reduce load on Redis.
#
# Measurements show that most of the load on Redis comes from a very
# large documents. We can shift some of that CPU load from redis to
# the docupdaters (which are scalable) by compressing the data in the
# docupdater first.
#
# To avoid overloading the docupdater clients we impose a minimum size
# on data we will compress, so we only catch the large ones.
#
# The optimimum size for the cutoff is about 10K, below this we do
# more work but don't really gain any extra reduction in Redis CPU
#
# |--------------------+-----------+--------------------------|
# | Compression cutoff | Redis CPU | Extra doc updater CPU(*) |
# |--------------------+-----------+--------------------------|
# | N/A | 100% | 0% |
# | 100k | 80% | 10% |
# | 10k | 50% | 30% |
# |--------------------+-----------+--------------------------|
#
# (*) percentage of a single core, because node zlib runs in multiple
# threads.
ZIP_WRITES_ENABLED = Settings.redis.zip?.writesEnabled
ZIP_MINSIZE = Settings.redis.zip?.minSize || 64*1024
module.exports = ZipManager =
uncompressIfNeeded: (doc_id, result, callback) ->
# result is an array of [text, version]. Each entry is a node
# Buffer object which we need to convert to strings on output
# first make sure the version (result[1]) is returned as a string
if result?[1]?.toString?
result[1] = result[1].toString()
# now uncompress the text (result[0]) if needed
buf = result?[0]
# Check if we have a GZIP file (magic numbers in header)
if buf? and buf[0] == 0x1F and buf[1] == 0x8B
zlib.gunzip buf, (err, newbuf) ->
if err?
logger.err doc_id:doc_id, err:err, "error uncompressing doc"
callback(err, null)
else
logger.log doc_id:doc_id, fromBytes: buf.length, toChars: newbuf.length, factor: buf.length/newbuf.length, "uncompressed successfully"
result[0] = newbuf.toString()
callback(null, result)
else
# if we don't have a GZIP file it's just a buffer of text, convert it back to a string
if buf?.toString?
result[0] = buf.toString()
callback(null, result)
compressIfNeeded: (doc_id, text, callback) ->
if ZIP_WRITES_ENABLED and ZIP_MINSIZE > 1024 and text.length > ZIP_MINSIZE
# N.B. skip files of 1k or less, because gzip increases the size
zlib.gzip text, (err, buf) ->
if err?
logger.err doc_id:doc_id, err:err, "error compressing doc"
callback(err, null)
else
logger.log doc_id:doc_id, fromChars: text.length, toBytes: buf.length, factor: buf.length/text.length , "compressed successfully"
callback(null, buf)
else
callback(null, text)

View file

@ -1,169 +0,0 @@
sinon = require('sinon')
chai = require('chai')
should = chai.should()
zipModulePath = "../../../../app/js/ZipManager"
redisModulePath = "../../../../app/js/RedisManager"
SandboxedModule = require('sandboxed-module')
zlib = require('zlib')
MIN_SIZE = 9999
describe "ZipManager with RedisManager", ->
describe "for a small document (uncompressed)", ->
rclient = null
beforeEach (done) ->
@ZipManager = SandboxedModule.require zipModulePath, requires:
"logger-sharelatex": log:->
'settings-sharelatex': redis:
web:
host: 'none'
port: 'none'
zip:
writesEnabled: true
minSize: MIN_SIZE
@RedisManager = SandboxedModule.require redisModulePath, requires:
"./ZipManager" : @ZipManager
"redis-sharelatex" : createClient: () =>
rclient ?=
auth:-> # only assign one rclient
multi: () => rclient
set: (key, value) => rclient.store[key] = value
get: (key) => rclient.results.push rclient.store[key]
incr: (key) => rclient.store[key]++
exec: (callback) =>
callback.apply(null, [null, rclient.results])
rclient.results = []
store: {}
results: []
"logger-sharelatex": {}
@doc_id = "document-id"
@version = 123
@docLines = ["hello", "world"]
@callback = sinon.stub()
@RedisManager.setDocument @doc_id, @docLines, @version, () =>
@callback()
done()
it "should set the document", ->
rclient.store['doclines:document-id']
.should.equal JSON.stringify(@docLines)
it "should return the callback", ->
@callback.called.should.equal true
it "should get the document back again", (done) ->
@RedisManager.getDoc @doc_id, (err, lines, version) =>
@docLines.should.eql lines
done()
describe "calling node zlib.gzip directly", ->
it "should compress the string 'hello world' within the timeout", (done) ->
zlib.gzip "hello world", done
it "should compress a 10k string within the timeout", (done) ->
text = ""
while text.length < 10*1024
text = text + "helloworld"
zlib.gzip text, done
describe "for a large document (with compression enabled)", ->
rclient = null
beforeEach (done) ->
@ZipManager = SandboxedModule.require zipModulePath, requires:
"logger-sharelatex": log:->
'settings-sharelatex': redis:
web:
host: 'none'
port: 'none'
zip:
writesEnabled: true
minSize: MIN_SIZE
@RedisManager = SandboxedModule.require redisModulePath, requires:
"./ZipManager" : @ZipManager
"redis-sharelatex" : createClient: () =>
rclient ?=
auth:-> # only assign one rclient
multi: () => rclient
set: (key, value) => rclient.store[key] = value
get: (key) => rclient.results.push rclient.store[key]
incr: (key) => rclient.store[key]++
exec: (callback) =>
callback.apply(null, [null, rclient.results])
rclient.results = []
store: {}
results: []
"logger-sharelatex": {}
@doc_id = "document-id"
@version = 123
@docLines = []
while @docLines.join('').length <= MIN_SIZE
@docLines.push "this is a long line in a long document"
# console.log "length of doclines", @docLines.join('').length
@callback = sinon.stub()
@RedisManager.setDocument @doc_id, @docLines, @version, () =>
@callback()
done()
it "should set the document as a gzipped blob", ->
rclient.store['doclines:document-id']
.should.not.equal JSON.stringify(@docLines)
it "should return the callback", ->
@callback.called.should.equal true
it "should get the uncompressed document back again", (done) ->
@RedisManager.getDoc @doc_id, (err, lines, version) =>
@docLines.should.eql lines
done()
describe "for a large document (with compression disabled)", ->
rclient = null
beforeEach (done) ->
@ZipManager = SandboxedModule.require zipModulePath, requires:
"logger-sharelatex": log:->
'settings-sharelatex': redis:
web:
host: 'none'
port: 'none'
zip:
writesEnabled: false
minSize: MIN_SIZE
@RedisManager = SandboxedModule.require redisModulePath, requires:
"./ZipManager" : @ZipManager
"redis-sharelatex" : createClient: () =>
rclient ?=
auth:-> # only assign one rclient
multi: () => rclient
set: (key, value) => rclient.store[key] = value
get: (key) => rclient.results.push rclient.store[key]
incr: (key) => rclient.store[key]++
exec: (callback) =>
callback.apply(null, [null, rclient.results])
rclient.results = []
store: {}
results: []
"logger-sharelatex": {}
@doc_id = "document-id"
@version = 123
@docLines = []
while @docLines.join('').length <= MIN_SIZE
@docLines.push "this is a long line in a long document"
@callback = sinon.stub()
@RedisManager.setDocument @doc_id, @docLines, @version, () =>
@callback()
done()
it "should set the document", ->
rclient.store['doclines:document-id']
.should.equal JSON.stringify(@docLines)
it "should return the callback", ->
@callback.called.should.equal true
it "should get the document back again", (done) ->
@RedisManager.getDoc @doc_id, (err, lines, version) =>
@docLines.should.eql lines
done()