Initial commit of script for compressing one docs history

This commit is contained in:
James Allen 2013-08-08 16:42:14 +01:00
commit 7e96933cf2
8 changed files with 547 additions and 0 deletions

4
services/track-changes/.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
**.swp
node_modules/
app/js
test/unit/js

View file

@ -0,0 +1,121 @@
strInject = (s1, pos, s2) -> s1[...pos] + s2 + s1[pos..]
strRemove = (s1, pos, length) -> s1[...pos] + s1[(pos + length)..]
module.exports = ConcatManager =
normalizeUpdate: (update) ->
updates = []
for op in update.op
updates.push
op: [op]
meta:
start_ts: update.meta.ts
end_ts: update.meta.ts
user_id: update.meta.user_id
return updates
MAX_TIME_BETWEEN_UPDATES: oneMinute = 60 * 1000
concatTwoUpdates: (firstUpdate, secondUpdate) ->
firstUpdate =
op: firstUpdate.op
meta:
user_id: firstUpdate.meta.user_id
start_ts: firstUpdate.meta.start_ts or firstUpdate.meta.ts
end_ts: firstUpdate.meta.end_ts or firstUpdate.meta.ts
secondUpdate =
op: secondUpdate.op
meta:
user_id: secondUpdate.meta.user_id
start_ts: secondUpdate.meta.start_ts or secondUpdate.meta.ts
end_ts: secondUpdate.meta.end_ts or secondUpdate.meta.ts
if firstUpdate.meta.user_id != secondUpdate.meta.user_id
return [firstUpdate, secondUpdate]
if secondUpdate.meta.start_ts - firstUpdate.meta.end_ts > ConcatManager.MAX_TIME_BETWEEN_UPDATES
return [firstUpdate, secondUpdate]
firstOp = firstUpdate.op[0]
secondOp = secondUpdate.op[0]
# Two inserts
if firstOp.i? and secondOp.i? and firstOp.p <= secondOp.p <= (firstOp.p + firstOp.i.length)
return [
meta:
start_ts: firstUpdate.meta.start_ts
end_ts: secondUpdate.meta.end_ts
user_id: firstUpdate.meta.user_id
op: [
p: firstOp.p
i: strInject(firstOp.i, secondOp.p - firstOp.p, secondOp.i)
]
]
# Two deletes
else if firstOp.d? and secondOp.d? and secondOp.p <= firstOp.p <= (secondOp.p + secondOp.d.length)
return [
meta:
start_ts: firstUpdate.meta.start_ts
end_ts: secondUpdate.meta.end_ts
user_id: firstUpdate.meta.user_id
op: [
p: secondOp.p
d: strInject(secondOp.d, firstOp.p - secondOp.p, firstOp.d)
]
]
# An insert and then a delete
else if firstOp.i? and secondOp.d? and firstOp.p <= secondOp.p <= (firstOp.p + firstOp.i.length)
offset = secondOp.p - firstOp.p
insertedText = firstOp.i.slice(offset, offset + secondOp.d.length)
if insertedText == secondOp.d
insert = strRemove(firstOp.i, offset, secondOp.d.length)
return [] if insert == ""
return [
meta:
start_ts: firstUpdate.meta.start_ts
end_ts: secondUpdate.meta.end_ts
user_id: firstUpdate.meta.user_id
op: [
p: firstOp.p
i: insert
]
]
else
# This shouldn't be possible!
return [firstUpdate, secondUpdate]
else if firstOp.d? and secondOp.i? and firstOp.p == secondOp.p
offset = firstOp.d.indexOf(secondOp.i)
if offset == -1
return [firstUpdate, secondUpdate]
headD = firstOp.d.slice(0, offset)
inserted = firstOp.d.slice(offset, secondOp.i.length)
tailD = firstOp.d.slice(offset + secondOp.i.length)
headP = firstOp.p
tailP = firstOp.p + secondOp.i.length
updates = []
if headD != ""
updates.push
meta:
start_ts: firstUpdate.meta.start_ts
end_ts: secondUpdate.meta.end_ts
user_id: firstUpdate.meta.user_id
op: [
p: headP
d: headD
]
if tailD != ""
updates.push
meta:
start_ts: firstUpdate.meta.start_ts
end_ts: secondUpdate.meta.end_ts
user_id: firstUpdate.meta.user_id
op: [
p: tailP
d: tailD
]
if updates.length == 2
updates[0].meta.start_ts = updates[0].meta.end_ts = firstUpdate.meta.start_ts
updates[1].meta.start_ts = updates[1].meta.end_ts = secondUpdate.meta.end_ts
return updates
else
return [firstUpdate, secondUpdate]

View file

@ -0,0 +1,8 @@
Settings = require "settings-sharelatex"
mongojs = require "mongojs"
db = mongojs.connect(Settings.mongo.url, ["docHistory", "docOps"])
module.exports =
db: db
ObjectId: mongojs.ObjectId

View file

@ -0,0 +1,74 @@
{db, ObjectId} = require "./app/js/mongojs"
ConcatManager = require "./app/js/ConcatManager"
doc_id = process.argv.pop()
console.log "DOC ID", doc_id
OPS_TO_LEAVE = 10
removeLatestCompressedUpdate = (doc_id, callback = (error) ->) ->
db.docHistory.update { doc_id: ObjectId(doc_id) }, { $pop: { docOps: 1 } }, callback
getLatestCompressedUpdate = (doc_id, callback = (error) ->) ->
db.docHistory.find { doc_id: ObjectId(doc_id) }, { docOps: { $slice: -1 } }, (error, history) ->
return callback(error) if error?
history = history[0] or { docOps: [] }
callback null, history.docOps.slice(-1)[0]
insertCompressedUpdates = (doc_id, updates, callback = (error) ->) ->
db.docHistory.update { doc_id: ObjectId(doc_id) }, { $push: { docOps: { $each: updates } } }, { upsert: true }, callback
trimLastRawUpdate = (doc_id, tailVersion, callback = (error) ->) ->
db.docOps.update { doc_id: ObjectId(doc_id) }, { $pop: { docOps: -1 }, $set: { tailVersion: tailVersion + 1 } }, callback
done = () ->
console.log "DONE! Here's the history:"
db.docHistory.find { doc_id: ObjectId(doc_id) }, (error, docs) ->
throw error if error?
doc = docs[0]
for update in doc.docOps
op = update.op[0]
if op.i?
console.log update.meta.start_ts, update.meta.end_ts, update.meta.user_id, "INSERT", op.p, op.i
else if op.d?
console.log update.meta.start_ts, update.meta.end_ts, update.meta.user_id, "DELETE", op.p, op.d
process.exit()
do next = () ->
db.docOps.find { doc_id: ObjectId(doc_id) }, { version: true, tailVersion: true, docOps: { $slice: 1 } }, (error, docs) ->
throw error if error?
throw "doc not found" if docs.length < 1
doc = docs[0]
tailVersion = doc.tailVersion or 0
version = doc.version
rawUpdate = doc.docOps[0]
rawUpdates = ConcatManager.normalizeUpdate(rawUpdate)
if version - tailVersion > OPS_TO_LEAVE
getLatestCompressedUpdate doc_id, (error, lastCompressedUpdate) ->
throw error if error?
if lastCompressedUpdate?
compressedUpdates = [lastCompressedUpdate]
for rawUpdate in rawUpdates
lastCompressedUpdate = compressedUpdates.pop()
compressedUpdates = compressedUpdates.concat ConcatManager.concatTwoUpdates lastCompressedUpdate, rawUpdate
removeLatestCompressedUpdate doc_id, (error) ->
throw error if error?
insertCompressedUpdates doc_id, compressedUpdates, (error) ->
throw error if error?
trimLastRawUpdate doc_id, tailVersion, (error) ->
throw error if error?
console.log "Pushed compressed op"
next()
else
insertCompressedUpdates doc_id, rawUpdates, (error) ->
trimLastRawUpdate doc_id, tailVersion, (error) ->
throw error if error?
console.log "Pushed first op"
next()
else
console.log "Up to date"
done()

View file

@ -0,0 +1,3 @@
module.exports =
mongo:
url: 'mongodb://127.0.0.1/sharelatexTesting'

View file

@ -0,0 +1,11 @@
{
"name": "history-sharelatex",
"version": "0.0.1",
"dependencies": {
"chai": "",
"sandboxed-module": "",
"sinon": "",
"mongojs": "0.7.2",
"settings": "git+ssh://git@bitbucket.org:sharelatex/settings-sharelatex.git#master"
}
}

View file

@ -0,0 +1,112 @@
require 'fileutils'
siteurl = "https://www.sharelatex.com"
desc "Compile JavaScirpt into CoffeeScript"
namespace 'setup' do
desc "installes npm packages json and global stuff like less"
task :installDependencys do
sh %{npm install}
sh %{npm install -g coffee-script}
sh %{git submodule init}
sh %{git submodule update}
end
end
namespace 'run' do
desc "compiles and runs the javascirpt version of the app"
task :app => ["compile:app"] do
sh %{node app.js | bunyan} do |ok, res|
if ! ok
raise "error compiling app folder tests : #{res}"
end
puts 'finished app compile'
end
end
end
namespace 'compile' do
desc "compiles main app folder"
task :app do
puts "Compiling app folder to JS"
FileUtils.rm_rf "app/js"
sh %{coffee -c -o app/js/ app/coffee/} do |ok, res|
if ! ok
raise "error compiling app folder tests : #{res}"
end
puts 'finished app compile'
end
end
desc "compiles unit tests"
task :unittests => ["compile:app"] do
puts "Compiling Unit Tests to JS"
`coffee -c -o test/unit/js/ test/unit/coffee/`
end
desc "compiles acceptance tests"
task :acceptancetests => ["compile:app"] do
puts "Compiling Acceptance Tests to JS"
sh %{coffee -c -o test/acceptance/js/ test/acceptance/coffee/} do |ok, res|
if ! ok
raise "error compiling acceptance tests: #{res}"
end
end
end
end
namespace 'test' do
desc "runs all test"
task :all => ["test:unit", "test:acceptance"] do
puts "testing everything"
end
desc "Run Acceptance Tests"
task :acceptance => ["compile:acceptancetests"]do
puts "Running Acceptance Tests"
feature = ENV['feature']
if feature.nil?
featureFlags = ""
else
featureFlags = "-g \"#{feature}\""
end
sh %{mocha -R spec #{featureFlags} test/acceptance/js/*} do |ok, res|
if ! ok
raise "error running acceptance tests: #{res}"
end
end
end
desc "run unit tests"
task :unit => ["compile:unittests"]do
puts "Running Unit Tests"
featurePath = ENV['feature']
puts featurePath
if featurePath.nil?
featurePath = ''
elsif featurePath.include? '/'
elsif !featurePath.include? '/'
featurePath +='/'
else
featurePath = ''
end
sh %{mocha -R spec test/unit/js/#{featurePath}* --ignore-leaks} do |ok, res|
if ! ok
raise "error running unit tests : #{res}"
end
end
end
end
namespace 'deploy' do
desc "safley deploys app"
task :live do
sh %{git push origin}
sh %{cap live deploy}
end
end

View file

@ -0,0 +1,214 @@
sinon = require('sinon')
chai = require('chai')
should = chai.should()
expect = chai.expect
modulePath = "../../../../app/js/ConcatManager.js"
SandboxedModule = require('sandboxed-module')
describe "ConcatManager", ->
beforeEach ->
@ConcatManager = SandboxedModule.require modulePath
@user_id = "user-id-1"
@other_user_id = "user-id-2"
@ts1 = Date.now()
@ts2 = Date.now() + 1000
describe "concatTwoUpdates", ->
describe "insert - insert", ->
it "should append one insert to the other", ->
expect(@ConcatManager.concatTwoUpdates({
op: [ p: 3, i: "foo" ]
meta: ts: @ts1, user_id: @user_id
}, {
op: [ p: 6, i: "bar" ]
meta: ts: @ts2, user_id: @user_id
}))
.to.deep.equal [{
op: [ p: 3, i: "foobar" ]
meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id
}]
it "should insert one insert inside the other", ->
expect(@ConcatManager.concatTwoUpdates({
op: [ p: 3, i: "foo" ]
meta: ts: @ts1, user_id: @user_id
}, {
op: [ p: 5, i: "bar" ]
meta: ts: @ts2, user_id: @user_id
}))
.to.deep.equal [{
op: [ p: 3, i: "fobaro" ]
meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id
}]
it "should not append separated inserts", ->
expect(@ConcatManager.concatTwoUpdates({
op: [ p: 3, i: "foo" ]
meta: ts: @ts1, user_id: @user_id
}, {
op: [ p: 9, i: "bar" ]
meta: ts: @ts2, user_id: @user_id
}))
.to.deep.equal [{
op: [ p: 3, i: "foo" ]
meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id
}, {
op: [ p: 9, i: "bar" ]
meta: start_ts: @ts2, end_ts: @ts2, user_id: @user_id
}]
describe "delete - delete", ->
it "should append one delete to the other", ->
expect(@ConcatManager.concatTwoUpdates({
op: [ p: 3, d: "foo" ]
meta: ts: @ts1, user_id: @user_id
}, {
op: [ p: 3, d: "bar" ]
meta: ts: @ts2, user_id: @user_id
}))
.to.deep.equal [{
op: [ p: 3, d: "foobar" ]
meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id
}]
it "should insert one delete inside the other", ->
expect(@ConcatManager.concatTwoUpdates({
op: [ p: 3, d: "foo" ]
meta: ts: @ts1, user_id: @user_id
}, {
op: [ p: 1, d: "bar" ]
meta: ts: @ts2, user_id: @user_id
}))
.to.deep.equal [{
op: [ p: 1, d: "bafoor" ]
meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id
}]
it "should not append separated deletes", ->
expect(@ConcatManager.concatTwoUpdates({
op: [ p: 3, d: "foo" ]
meta: ts: @ts1, user_id: @user_id
}, {
op: [ p: 9, d: "bar" ]
meta: ts: @ts2, user_id: @user_id
}))
.to.deep.equal [{
op: [ p: 3, d: "foo" ]
meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id
}, {
op: [ p: 9, d: "bar" ]
meta: start_ts: @ts2, end_ts: @ts2, user_id: @user_id
}]
describe "insert - delete", ->
it "should undo a previous insert", ->
expect(@ConcatManager.concatTwoUpdates({
op: [ p: 3, i: "foo" ]
meta: ts: @ts1, user_id: @user_id
}, {
op: [ p: 5, d: "o" ]
meta: ts: @ts2, user_id: @user_id
}))
.to.deep.equal [{
op: [ p: 3, i: "fo" ]
meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id
}]
it "should remove part of an insert from the middle", ->
expect(@ConcatManager.concatTwoUpdates({
op: [ p: 3, i: "fobaro" ]
meta: ts: @ts1, user_id: @user_id
}, {
op: [ p: 5, d: "bar" ]
meta: ts: @ts2, user_id: @user_id
}))
.to.deep.equal [{
op: [ p: 3, i: "foo" ]
meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id
}]
it "should cancel out two opposite updates", ->
expect(@ConcatManager.concatTwoUpdates({
op: [ p: 3, i: "foo" ]
meta: ts: @ts1, user_id: @user_id
}, {
op: [ p: 3, d: "foo" ]
meta: ts: @ts2, user_id: @user_id
}))
.to.deep.equal []
it "should not combine separated updates", ->
expect(@ConcatManager.concatTwoUpdates({
op: [ p: 3, i: "foo" ]
meta: ts: @ts1, user_id: @user_id
}, {
op: [ p: 9, d: "bar" ]
meta: ts: @ts2, user_id: @user_id
}))
.to.deep.equal [{
op: [ p: 3, i: "foo" ]
meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id
}, {
op: [ p: 9, d: "bar" ]
meta: start_ts: @ts2, end_ts: @ts2, user_id: @user_id
}]
describe "delete - insert", ->
it "should redo a previous delete at the beginning", ->
expect(@ConcatManager.concatTwoUpdates({
op: [ p: 3, d: "foo" ]
meta: ts: @ts1, user_id: @user_id
}, {
op: [ p: 3, i: "f" ]
meta: ts: @ts2, user_id: @user_id
}))
.to.deep.equal [{
op: [ p: 4, d: "oo" ]
meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id
}]
it "should redo a previous delete from halfway through", ->
expect(@ConcatManager.concatTwoUpdates({
op: [ p: 3, d: "foobar" ]
meta: ts: @ts1, user_id: @user_id
}, {
op: [ p: 3, i: "oo" ]
meta: ts: @ts2, user_id: @user_id
}))
.to.deep.equal [{
op: [ p: 3, d: "f" ]
meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id
}, {
op: [ p: 5, d: "bar" ]
meta: start_ts: @ts2, end_ts: @ts2, user_id: @user_id
}]
it "should not combine the ops if the insert text does not match the delete text", ->
expect(@ConcatManager.concatTwoUpdates({
op: [ p: 3, d: "foobar" ]
meta: ts: @ts1, user_id: @user_id
}, {
op: [ p: 3, i: "xy" ]
meta: ts: @ts2, user_id: @user_id
}))
.to.deep.equal [{
op: [ p: 3, d: "foobar" ]
meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id
}, {
op: [ p: 3, i: "xy" ]
meta: start_ts: @ts2, end_ts: @ts2, user_id: @user_id
}]
it "should cancel two equal updates", ->
expect(@ConcatManager.concatTwoUpdates({
op: [ p: 3, d: "foo" ]
meta: ts: @ts1, user_id: @user_id
}, {
op: [ p: 3, i: "foo" ]
meta: ts: @ts2, user_id: @user_id
}))
.to.deep.equal []