Compress doc ops in two passes

This commit is contained in:
James Allen 2013-08-23 14:35:13 +01:00
parent bef05c31f4
commit dbee4a57fb
5 changed files with 145 additions and 47 deletions

View file

@ -0,0 +1,19 @@
express = require "express"
app = express()
ConversoinManager = require "./app/js/ConversionManager"
logger = require "logger-sharelatex"
logger.initialize("history")
app.post "/doc/:doc_id/flush", (req, res, next) ->
project_id = req.params.project_id
logger.log doc_id: doc_id, "compressing doc history"
ConversionManager.convertOldRawUpdates doc_id, (error) ->
return next(error) if error?
res.send 204 # No content
app.use (error, req, res, next) ->
logger.error err: error, "an internal error occured"
req.send 500
app.listen(3014)

View file

@ -13,9 +13,30 @@ module.exports = ConcatManager =
user_id: update.meta.user_id user_id: update.meta.user_id
return updates return updates
compressUpdates: (rawUpdates) ->
return [] if rawUpdates.length == 0
firstPass = [rawUpdates.shift()]
for update in rawUpdates
lastCompressedUpdate = firstPass.pop()
if lastCompressedUpdate?
firstPass = firstPass.concat ConcatManager._concatTwoUpdatesOfTheSameType lastCompressedUpdate, update
else
firstPass.push rawUpdate
return [] if firstPass.length == 0
secondPass = [firstPass.shift()]
for update in firstPass
lastCompressedUpdate = secondPass.pop()
if lastCompressedUpdate?
secondPass = secondPass.concat ConcatManager._cancelOppositeInsertsAndDeletes lastCompressedUpdate, update
else
secondPass.push update
return secondPass
MAX_TIME_BETWEEN_UPDATES: oneMinute = 60 * 1000 MAX_TIME_BETWEEN_UPDATES: oneMinute = 60 * 1000
concatTwoUpdates: (firstUpdate, secondUpdate) -> _concatTwoUpdatesOfTheSameType: (firstUpdate, secondUpdate) ->
firstUpdate = firstUpdate =
op: firstUpdate.op op: firstUpdate.op
meta: meta:
@ -61,8 +82,34 @@ module.exports = ConcatManager =
d: strInject(secondOp.d, firstOp.p - secondOp.p, firstOp.d) d: strInject(secondOp.d, firstOp.p - secondOp.p, firstOp.d)
] ]
] ]
else
return [firstUpdate, secondUpdate]
_cancelOppositeInsertsAndDeletes: (firstUpdate, secondUpdate) ->
firstUpdate =
op: firstUpdate.op
meta:
user_id: firstUpdate.meta.user_id or null
start_ts: firstUpdate.meta.start_ts or firstUpdate.meta.ts
end_ts: firstUpdate.meta.end_ts or firstUpdate.meta.ts
secondUpdate =
op: secondUpdate.op
meta:
user_id: secondUpdate.meta.user_id or null
start_ts: secondUpdate.meta.start_ts or secondUpdate.meta.ts
end_ts: secondUpdate.meta.end_ts or secondUpdate.meta.ts
if firstUpdate.meta.user_id != secondUpdate.meta.user_id
return [firstUpdate, secondUpdate]
if secondUpdate.meta.start_ts - firstUpdate.meta.end_ts > ConcatManager.MAX_TIME_BETWEEN_UPDATES
return [firstUpdate, secondUpdate]
firstOp = firstUpdate.op[0]
secondOp = secondUpdate.op[0]
# An insert and then a delete # An insert and then a delete
else if firstOp.i? and secondOp.d? and firstOp.p <= secondOp.p <= (firstOp.p + firstOp.i.length) if firstOp.i? and secondOp.d? and firstOp.p <= secondOp.p <= (firstOp.p + firstOp.i.length)
offset = secondOp.p - firstOp.p offset = secondOp.p - firstOp.p
insertedText = firstOp.i.slice(offset, offset + secondOp.d.length) insertedText = firstOp.i.slice(offset, offset + secondOp.d.length)
if insertedText == secondOp.d if insertedText == secondOp.d

View file

@ -1,8 +1,9 @@
{db, ObjectId} = require "./mongojs" {db, ObjectId} = require "./mongojs"
ConcatManager = require "./ConcatManager" ConcatManager = require "./ConcatManager"
logger = require "logger-sharelatex"
module.exports = ConversionManager = module.exports = ConversionManager =
OPS_TO_LEAVE: 10 OPS_TO_LEAVE: 100
popLatestCompressedUpdate: (doc_id, callback = (error, update) ->) -> popLatestCompressedUpdate: (doc_id, callback = (error, update) ->) ->
db.docHistory.findAndModify db.docHistory.findAndModify
@ -25,12 +26,19 @@ module.exports = ConversionManager =
tailVersion = doc.tailVersion or 0 tailVersion = doc.tailVersion or 0
if currentVersion - tailVersion > ConversionManager.OPS_TO_LEAVE if currentVersion - tailVersion > ConversionManager.OPS_TO_LEAVE
db.docOps.findAndModify db.docOps.findAndModify
query: { doc_id: ObjectId(doc_id), version: currentVersion } query:
update: { doc_id: ObjectId(doc_id)
$push: { docOps: { $each: [], $slice: - ConversionManager.OPS_TO_LEAVE } } version: currentVersion
$set: tailVersion: currentVersion - ConversionManager.OPS_TO_LEAVE, update:
} $push:
fields: { docOps: $slice: currentVersion - tailVersion - ConversionManager.OPS_TO_LEAVE } docOps:
$each: []
$slice: - ConversionManager.OPS_TO_LEAVE
$set:
tailVersion: currentVersion - ConversionManager.OPS_TO_LEAVE
fields:
docOps:
$slice: currentVersion - tailVersion - ConversionManager.OPS_TO_LEAVE
, (error, doc) -> , (error, doc) ->
return callback(error) if error? return callback(error) if error?
if !doc? if !doc?
@ -63,15 +71,11 @@ module.exports = ConversionManager =
# No saved versions, no raw updates, nothing to do # No saved versions, no raw updates, nothing to do
return callback() return callback()
compressedUpdates = [lastCompressedUpdate] uncompressedUpdates = [lastCompressedUpdate].concat rawUpdates
for rawUpdate in rawUpdates compressedUpdates = ConcatManager.compressUpdates uncompressedUpdates
lastCompressedUpdate = compressedUpdates.pop()
if lastCompressedUpdate?
compressedUpdates = compressedUpdates.concat ConcatManager.concatTwoUpdates lastCompressedUpdate, rawUpdate
else
compressedUpdates.push rawUpdate
ConversionManager.insertCompressedUpdates doc_id, compressedUpdates, (error) -> ConversionManager.insertCompressedUpdates doc_id, compressedUpdates, (error) ->
return callback(error) if error? return callback(error) if error?
console.log doc_id, "Pushed doc ops", length logger.log doc_id: doc_id, rawOpsLength: length, compressedOpsLength: compressedUpdates.length, "compressed doc ops"
callback() callback()

View file

@ -4,9 +4,11 @@
"dependencies": { "dependencies": {
"async": "", "async": "",
"chai": "", "chai": "",
"express": "3.3.5",
"sandboxed-module": "", "sandboxed-module": "",
"sinon": "", "sinon": "",
"mongojs": "0.7.2", "mongojs": "0.7.2",
"settings": "git+ssh://git@bitbucket.org:sharelatex/settings-sharelatex.git#master" "settings": "git+ssh://git@bitbucket.org:sharelatex/settings-sharelatex.git#master",
"logger": "git+ssh://git@bitbucket.org:sharelatex/logger-sharelatex.git#bunyan"
} }
} }

View file

@ -13,42 +13,42 @@ describe "ConcatManager", ->
@ts1 = Date.now() @ts1 = Date.now()
@ts2 = Date.now() + 1000 @ts2 = Date.now() + 1000
describe "concatTwoUpdates", -> describe "compress", ->
describe "insert - insert", -> describe "insert - insert", ->
it "should append one insert to the other", -> it "should append one insert to the other", ->
expect(@ConcatManager.concatTwoUpdates({ expect(@ConcatManager.compressUpdates [{
op: [ p: 3, i: "foo" ] op: [ p: 3, i: "foo" ]
meta: ts: @ts1, user_id: @user_id meta: ts: @ts1, user_id: @user_id
}, { }, {
op: [ p: 6, i: "bar" ] op: [ p: 6, i: "bar" ]
meta: ts: @ts2, user_id: @user_id meta: ts: @ts2, user_id: @user_id
})) }])
.to.deep.equal [{ .to.deep.equal [{
op: [ p: 3, i: "foobar" ] op: [ p: 3, i: "foobar" ]
meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id
}] }]
it "should insert one insert inside the other", -> it "should insert one insert inside the other", ->
expect(@ConcatManager.concatTwoUpdates({ expect(@ConcatManager.compressUpdates [{
op: [ p: 3, i: "foo" ] op: [ p: 3, i: "foo" ]
meta: ts: @ts1, user_id: @user_id meta: ts: @ts1, user_id: @user_id
}, { }, {
op: [ p: 5, i: "bar" ] op: [ p: 5, i: "bar" ]
meta: ts: @ts2, user_id: @user_id meta: ts: @ts2, user_id: @user_id
})) }])
.to.deep.equal [{ .to.deep.equal [{
op: [ p: 3, i: "fobaro" ] op: [ p: 3, i: "fobaro" ]
meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id
}] }]
it "should not append separated inserts", -> it "should not append separated inserts", ->
expect(@ConcatManager.concatTwoUpdates({ expect(@ConcatManager.compressUpdates [{
op: [ p: 3, i: "foo" ] op: [ p: 3, i: "foo" ]
meta: ts: @ts1, user_id: @user_id meta: ts: @ts1, user_id: @user_id
}, { }, {
op: [ p: 9, i: "bar" ] op: [ p: 9, i: "bar" ]
meta: ts: @ts2, user_id: @user_id meta: ts: @ts2, user_id: @user_id
})) }])
.to.deep.equal [{ .to.deep.equal [{
op: [ p: 3, i: "foo" ] op: [ p: 3, i: "foo" ]
meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id
@ -59,39 +59,39 @@ describe "ConcatManager", ->
describe "delete - delete", -> describe "delete - delete", ->
it "should append one delete to the other", -> it "should append one delete to the other", ->
expect(@ConcatManager.concatTwoUpdates({ expect(@ConcatManager.compressUpdates [{
op: [ p: 3, d: "foo" ] op: [ p: 3, d: "foo" ]
meta: ts: @ts1, user_id: @user_id meta: ts: @ts1, user_id: @user_id
}, { }, {
op: [ p: 3, d: "bar" ] op: [ p: 3, d: "bar" ]
meta: ts: @ts2, user_id: @user_id meta: ts: @ts2, user_id: @user_id
})) }])
.to.deep.equal [{ .to.deep.equal [{
op: [ p: 3, d: "foobar" ] op: [ p: 3, d: "foobar" ]
meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id
}] }]
it "should insert one delete inside the other", -> it "should insert one delete inside the other", ->
expect(@ConcatManager.concatTwoUpdates({ expect(@ConcatManager.compressUpdates [{
op: [ p: 3, d: "foo" ] op: [ p: 3, d: "foo" ]
meta: ts: @ts1, user_id: @user_id meta: ts: @ts1, user_id: @user_id
}, { }, {
op: [ p: 1, d: "bar" ] op: [ p: 1, d: "bar" ]
meta: ts: @ts2, user_id: @user_id meta: ts: @ts2, user_id: @user_id
})) }])
.to.deep.equal [{ .to.deep.equal [{
op: [ p: 1, d: "bafoor" ] op: [ p: 1, d: "bafoor" ]
meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id
}] }]
it "should not append separated deletes", -> it "should not append separated deletes", ->
expect(@ConcatManager.concatTwoUpdates({ expect(@ConcatManager.compressUpdates [{
op: [ p: 3, d: "foo" ] op: [ p: 3, d: "foo" ]
meta: ts: @ts1, user_id: @user_id meta: ts: @ts1, user_id: @user_id
}, { }, {
op: [ p: 9, d: "bar" ] op: [ p: 9, d: "bar" ]
meta: ts: @ts2, user_id: @user_id meta: ts: @ts2, user_id: @user_id
})) }])
.to.deep.equal [{ .to.deep.equal [{
op: [ p: 3, d: "foo" ] op: [ p: 3, d: "foo" ]
meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id
@ -102,49 +102,49 @@ describe "ConcatManager", ->
describe "insert - delete", -> describe "insert - delete", ->
it "should undo a previous insert", -> it "should undo a previous insert", ->
expect(@ConcatManager.concatTwoUpdates({ expect(@ConcatManager.compressUpdates [{
op: [ p: 3, i: "foo" ] op: [ p: 3, i: "foo" ]
meta: ts: @ts1, user_id: @user_id meta: ts: @ts1, user_id: @user_id
}, { }, {
op: [ p: 5, d: "o" ] op: [ p: 5, d: "o" ]
meta: ts: @ts2, user_id: @user_id meta: ts: @ts2, user_id: @user_id
})) }])
.to.deep.equal [{ .to.deep.equal [{
op: [ p: 3, i: "fo" ] op: [ p: 3, i: "fo" ]
meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id
}] }]
it "should remove part of an insert from the middle", -> it "should remove part of an insert from the middle", ->
expect(@ConcatManager.concatTwoUpdates({ expect(@ConcatManager.compressUpdates [{
op: [ p: 3, i: "fobaro" ] op: [ p: 3, i: "fobaro" ]
meta: ts: @ts1, user_id: @user_id meta: ts: @ts1, user_id: @user_id
}, { }, {
op: [ p: 5, d: "bar" ] op: [ p: 5, d: "bar" ]
meta: ts: @ts2, user_id: @user_id meta: ts: @ts2, user_id: @user_id
})) }])
.to.deep.equal [{ .to.deep.equal [{
op: [ p: 3, i: "foo" ] op: [ p: 3, i: "foo" ]
meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id
}] }]
it "should cancel out two opposite updates", -> it "should cancel out two opposite updates", ->
expect(@ConcatManager.concatTwoUpdates({ expect(@ConcatManager.compressUpdates [{
op: [ p: 3, i: "foo" ] op: [ p: 3, i: "foo" ]
meta: ts: @ts1, user_id: @user_id meta: ts: @ts1, user_id: @user_id
}, { }, {
op: [ p: 3, d: "foo" ] op: [ p: 3, d: "foo" ]
meta: ts: @ts2, user_id: @user_id meta: ts: @ts2, user_id: @user_id
})) }])
.to.deep.equal [] .to.deep.equal []
it "should not combine separated updates", -> it "should not combine separated updates", ->
expect(@ConcatManager.concatTwoUpdates({ expect(@ConcatManager.compressUpdates [{
op: [ p: 3, i: "foo" ] op: [ p: 3, i: "foo" ]
meta: ts: @ts1, user_id: @user_id meta: ts: @ts1, user_id: @user_id
}, { }, {
op: [ p: 9, d: "bar" ] op: [ p: 9, d: "bar" ]
meta: ts: @ts2, user_id: @user_id meta: ts: @ts2, user_id: @user_id
})) }])
.to.deep.equal [{ .to.deep.equal [{
op: [ p: 3, i: "foo" ] op: [ p: 3, i: "foo" ]
meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id
@ -155,26 +155,26 @@ describe "ConcatManager", ->
describe "delete - insert", -> describe "delete - insert", ->
it "should redo a previous delete at the beginning", -> it "should redo a previous delete at the beginning", ->
expect(@ConcatManager.concatTwoUpdates({ expect(@ConcatManager.compressUpdates [{
op: [ p: 3, d: "foo" ] op: [ p: 3, d: "foo" ]
meta: ts: @ts1, user_id: @user_id meta: ts: @ts1, user_id: @user_id
}, { }, {
op: [ p: 3, i: "f" ] op: [ p: 3, i: "f" ]
meta: ts: @ts2, user_id: @user_id meta: ts: @ts2, user_id: @user_id
})) }])
.to.deep.equal [{ .to.deep.equal [{
op: [ p: 4, d: "oo" ] op: [ p: 4, d: "oo" ]
meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id
}] }]
it "should redo a previous delete from halfway through", -> it "should redo a previous delete from halfway through", ->
expect(@ConcatManager.concatTwoUpdates({ expect(@ConcatManager.compressUpdates [{
op: [ p: 3, d: "foobar" ] op: [ p: 3, d: "foobar" ]
meta: ts: @ts1, user_id: @user_id meta: ts: @ts1, user_id: @user_id
}, { }, {
op: [ p: 3, i: "oo" ] op: [ p: 3, i: "oo" ]
meta: ts: @ts2, user_id: @user_id meta: ts: @ts2, user_id: @user_id
})) }])
.to.deep.equal [{ .to.deep.equal [{
op: [ p: 3, d: "f" ] op: [ p: 3, d: "f" ]
meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id
@ -183,14 +183,40 @@ describe "ConcatManager", ->
meta: start_ts: @ts2, end_ts: @ts2, user_id: @user_id meta: start_ts: @ts2, end_ts: @ts2, user_id: @user_id
}] }]
it "should keep words together", ->
expect(@ConcatManager.compressUpdates [{
op: [ p: 3, d: "abcdefghijklmnopqrstuvwxyz hello world" ]
meta: ts: @ts1, user_id: @user_id
}, {
op: [ p: 3, i: "w" ]
meta: ts: @ts2, user_id: @user_id
}, {
op: [ p: 4, i: "o" ]
meta: ts: @ts2, user_id: @user_id
}, {
op: [ p: 5, i: "r" ]
meta: ts: @ts2, user_id: @user_id
}, {
op: [ p: 6, i: "l" ]
meta: ts: @ts2, user_id: @user_id
}, {
op: [ p: 7, i: "d" ]
meta: ts: @ts2, user_id: @user_id
}])
.to.deep.equal [{
op: [ p: 3, d: "abcdefghijklmnopqrstuvwxyz hello " ]
meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id
}]
it "should not combine the ops if the insert text does not match the delete text", -> it "should not combine the ops if the insert text does not match the delete text", ->
expect(@ConcatManager.concatTwoUpdates({ expect(@ConcatManager.compressUpdates [{
op: [ p: 3, d: "foobar" ] op: [ p: 3, d: "foobar" ]
meta: ts: @ts1, user_id: @user_id meta: ts: @ts1, user_id: @user_id
}, { }, {
op: [ p: 3, i: "xy" ] op: [ p: 3, i: "xy" ]
meta: ts: @ts2, user_id: @user_id meta: ts: @ts2, user_id: @user_id
})) }])
.to.deep.equal [{ .to.deep.equal [{
op: [ p: 3, d: "foobar" ] op: [ p: 3, d: "foobar" ]
meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id
@ -200,13 +226,13 @@ describe "ConcatManager", ->
}] }]
it "should cancel two equal updates", -> it "should cancel two equal updates", ->
expect(@ConcatManager.concatTwoUpdates({ expect(@ConcatManager.compressUpdates [{
op: [ p: 3, d: "foo" ] op: [ p: 3, d: "foo" ]
meta: ts: @ts1, user_id: @user_id meta: ts: @ts1, user_id: @user_id
}, { }, {
op: [ p: 3, i: "foo" ] op: [ p: 3, i: "foo" ]
meta: ts: @ts2, user_id: @user_id meta: ts: @ts2, user_id: @user_id
})) }])
.to.deep.equal [] .to.deep.equal []