overleaf/services/track-changes/pack.coffee

71 lines
2.2 KiB
CoffeeScript
Raw Normal View History

2015-02-13 11:23:36 -05:00
mongojs = require "mongojs"
async = require "async"
db = mongojs.connect("localhost/sharelatex", ["docHistory"])
BSON=db.bson.BSON
util = require 'util'
_ = require 'underscore'
MAX_SIZE = 1024*1024
MAX_COUNT = 1024
2015-02-13 12:01:37 -05:00
MIN_COUNT = 100
KEEP_OPS = 100
2015-02-13 11:23:36 -05:00
2015-02-13 12:01:37 -05:00
insertPack = (packObj, callback) ->
bulk = db.docHistory.initializeOrderedBulkOp();
expect_nInserted = 1
expect_nRemoved = packObj.pack.length
console.log 'inserting', expect_nInserted, 'pack, will remove', expect_nRemoved, 'ops'
bulk.insert packObj
packObj.pack.forEach (op) ->
bulk.find({_id:op._id}).removeOne()
bulk.execute (err, result) ->
if err? or result.nInserted != expect_nInserted or result.nRemoved != expect_nRemoved
console.log err, result
console.log 'nInserted', result.nInserted, 'nRemoved', result.nRemoved
callback(err, result)
packDocHistory = (doc_id, callback) ->
console.log 'packing doc_id', doc_id
2015-02-13 11:23:36 -05:00
db.docHistory.find({doc_id:mongojs.ObjectId(doc_id),pack:{$exists:false}}).sort {v:1}, (err, docs) ->
packs = []
top = null
2015-02-13 12:01:37 -05:00
origDocs = docs.length
# keep the last KEEP_OPS as individual ops
docs = docs.slice(0,-KEEP_OPS)
2015-02-13 11:23:36 -05:00
docs.forEach (d,i) ->
sz = BSON.calculateObjectSize(d)
if top? && top.pack.length < MAX_COUNT && top.sz + sz < MAX_SIZE
top.pack = top.pack.concat {v: d.v, meta: d.meta, op: d.op, _id: d._id}
top.sz += sz
top.v_end = d.v
top.meta.end_ts = d.meta.end_ts
return
2015-02-13 12:01:37 -05:00
else if sz < MAX_SIZE
2015-02-13 11:23:36 -05:00
# create a new pack
top = _.clone(d)
top.pack = [ {v: d.v, meta: d.meta, op: d.op, _id: d._id} ]
top.meta = { start_ts: d.meta.start_ts, end_ts: d.meta.end_ts }
top.sz = sz
delete top.op
delete top._id
packs.push top
2015-02-13 12:01:37 -05:00
else
# keep the op
console.log 'keeping large op unchanged'
2015-02-13 11:23:36 -05:00
2015-02-13 12:01:37 -05:00
# only store packs with a sufficient number of ops, discard others
packs = packs.filter (packObj) ->
packObj.pack.length > MIN_COUNT
2015-02-13 11:23:36 -05:00
2015-02-13 12:01:37 -05:00
console.log 'docs', origDocs, 'packs', packs.length
async.each packs, insertPack, (err, result) ->
if err?
console.log 'err', err
console.log 'done writing packs'
2015-02-13 11:23:36 -05:00
callback err, result
2015-02-13 12:01:37 -05:00
async.each process.argv.slice(2), (doc_id, callback) ->
packDocHistory(doc_id, callback)
, (err, results) ->
console.log 'closing db'
db.close()