mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-30 01:55:34 -05:00
70 lines
2.2 KiB
CoffeeScript
70 lines
2.2 KiB
CoffeeScript
mongojs = require "mongojs"
|
|
async = require "async"
|
|
db = mongojs.connect("localhost/sharelatex", ["docHistory"])
|
|
BSON=db.bson.BSON
|
|
util = require 'util'
|
|
_ = require 'underscore'
|
|
MAX_SIZE = 1024*1024
|
|
MAX_COUNT = 1024
|
|
MIN_COUNT = 100
|
|
KEEP_OPS = 100
|
|
|
|
insertPack = (packObj, callback) ->
|
|
bulk = db.docHistory.initializeOrderedBulkOp();
|
|
expect_nInserted = 1
|
|
expect_nRemoved = packObj.pack.length
|
|
console.log 'inserting', expect_nInserted, 'pack, will remove', expect_nRemoved, 'ops'
|
|
bulk.insert packObj
|
|
packObj.pack.forEach (op) ->
|
|
bulk.find({_id:op._id}).removeOne()
|
|
bulk.execute (err, result) ->
|
|
if err? or result.nInserted != expect_nInserted or result.nRemoved != expect_nRemoved
|
|
console.log err, result
|
|
console.log 'nInserted', result.nInserted, 'nRemoved', result.nRemoved
|
|
callback(err, result)
|
|
|
|
packDocHistory = (doc_id, callback) ->
|
|
console.log 'packing doc_id', doc_id
|
|
db.docHistory.find({doc_id:mongojs.ObjectId(doc_id),pack:{$exists:false}}).sort {v:1}, (err, docs) ->
|
|
packs = []
|
|
top = null
|
|
origDocs = docs.length
|
|
# keep the last KEEP_OPS as individual ops
|
|
docs = docs.slice(0,-KEEP_OPS)
|
|
docs.forEach (d,i) ->
|
|
sz = BSON.calculateObjectSize(d)
|
|
if top? && top.pack.length < MAX_COUNT && top.sz + sz < MAX_SIZE
|
|
top.pack = top.pack.concat {v: d.v, meta: d.meta, op: d.op, _id: d._id}
|
|
top.sz += sz
|
|
top.v_end = d.v
|
|
top.meta.end_ts = d.meta.end_ts
|
|
return
|
|
else if sz < MAX_SIZE
|
|
# create a new pack
|
|
top = _.clone(d)
|
|
top.pack = [ {v: d.v, meta: d.meta, op: d.op, _id: d._id} ]
|
|
top.meta = { start_ts: d.meta.start_ts, end_ts: d.meta.end_ts }
|
|
top.sz = sz
|
|
delete top.op
|
|
delete top._id
|
|
packs.push top
|
|
else
|
|
# keep the op
|
|
console.log 'keeping large op unchanged'
|
|
|
|
# only store packs with a sufficient number of ops, discard others
|
|
packs = packs.filter (packObj) ->
|
|
packObj.pack.length > MIN_COUNT
|
|
|
|
console.log 'docs', origDocs, 'packs', packs.length
|
|
async.each packs, insertPack, (err, result) ->
|
|
if err?
|
|
console.log 'err', err
|
|
console.log 'done writing packs'
|
|
callback err, result
|
|
|
|
async.each process.argv.slice(2), (doc_id, callback) ->
|
|
packDocHistory(doc_id, callback)
|
|
, (err, results) ->
|
|
console.log 'closing db'
|
|
db.close()
|