mirror of
https://github.com/overleaf/overleaf.git
synced 2025-01-08 17:32:56 +00:00
ee85d948e2
GitOrigin-RevId: ef2ef77e26df59d1af3df6dc664e284d3c70102d
171 lines
5.6 KiB
JavaScript
171 lines
5.6 KiB
JavaScript
/** @module */
|
|
'use strict'
|
|
|
|
const _ = require('lodash')
|
|
const BPromise = require('bluebird')
|
|
|
|
const core = require('overleaf-editor-core')
|
|
const Chunk = core.Chunk
|
|
const History = core.History
|
|
|
|
const assert = require('./assert')
|
|
const chunkStore = require('./chunk_store')
|
|
|
|
function countChangeBytes(change) {
|
|
// Note: This is not quite accurate, because the raw change may contain raw
|
|
// file info (or conceivably even content) that will not be included in the
|
|
// actual stored object.
|
|
return Buffer.byteLength(JSON.stringify(change.toRaw()))
|
|
}
|
|
|
|
function totalChangeBytes(changes) {
|
|
return changes.length ? _(changes).map(countChangeBytes).sum() : 0
|
|
}
|
|
|
|
// provide a simple timer function
|
|
function Timer() {
|
|
this.t0 = process.hrtime()
|
|
}
|
|
Timer.prototype.elapsed = function () {
|
|
const dt = process.hrtime(this.t0)
|
|
const timeInMilliseconds = (dt[0] + dt[1] * 1e-9) * 1e3
|
|
return timeInMilliseconds
|
|
}
|
|
|
|
/**
|
|
* Break the given set of changes into zero or more Chunks according to the
|
|
* provided limits and store them.
|
|
*
|
|
* Some other possible improvements:
|
|
* 1. This does a lot more JSON serialization than it has to. We may know the
|
|
* JSON for the changes before we call this function, so we could in that
|
|
* case get the byte size of each change without doing any work. Even if we
|
|
* don't know it initially, we could save some computation by caching this
|
|
* info rather than recomputing it many times. TBD whether it is worthwhile.
|
|
* 2. We don't necessarily have to fetch the latest chunk in order to determine
|
|
* that it is full. We could store this in the chunk metadata record. It may
|
|
* be worth distinguishing between a Chunk and its metadata record. The
|
|
* endVersion may be better suited to the metadata record.
|
|
*
|
|
* @param {string} projectId
|
|
* @param {Array.<Change>} allChanges
|
|
* @param {Object} limits
|
|
* @param {number} clientEndVersion
|
|
* @return {Promise.<Object?>}
|
|
*/
|
|
module.exports = function persistChanges(
|
|
projectId,
|
|
allChanges,
|
|
limits,
|
|
clientEndVersion
|
|
) {
|
|
assert.projectId(projectId)
|
|
assert.array(allChanges)
|
|
assert.maybe.object(limits)
|
|
assert.integer(clientEndVersion)
|
|
|
|
let currentChunk
|
|
// currentSnapshot tracks the latest change that we're applying; we use it to
|
|
// check that the changes we are persisting are valid.
|
|
let currentSnapshot
|
|
let originalEndVersion
|
|
let changesToPersist
|
|
|
|
limits = limits || {}
|
|
_.defaults(limits, {
|
|
changeBucketMinutes: 60,
|
|
maxChanges: 2500,
|
|
maxChangeBytes: 5 * 1024 * 1024,
|
|
maxChunkChanges: 2000,
|
|
maxChunkChangeBytes: 5 * 1024 * 1024,
|
|
maxChunkChangeTime: 5000, // warn if total time for changes in a chunk takes longer than this
|
|
})
|
|
|
|
function checkElapsedTime(timer) {
|
|
const timeTaken = timer.elapsed()
|
|
if (timeTaken > limits.maxChunkChangeTime) {
|
|
console.log('warning: slow chunk', projectId, timeTaken)
|
|
}
|
|
}
|
|
|
|
function fillChunk(chunk, changes) {
|
|
let totalBytes = totalChangeBytes(chunk.getChanges())
|
|
let changesPushed = false
|
|
while (changes.length > 0) {
|
|
if (chunk.getChanges().length >= limits.maxChunkChanges) break
|
|
const changeBytes = countChangeBytes(changes[0])
|
|
if (totalBytes + changeBytes > limits.maxChunkChangeBytes) break
|
|
const changesToFill = changes.splice(0, 1)
|
|
currentSnapshot.applyAll(changesToFill, { strict: true })
|
|
chunk.pushChanges(changesToFill)
|
|
totalBytes += changeBytes
|
|
changesPushed = true
|
|
}
|
|
return changesPushed
|
|
}
|
|
|
|
function extendLastChunkIfPossible() {
|
|
return chunkStore.loadLatest(projectId).then(function (latestChunk) {
|
|
currentChunk = latestChunk
|
|
originalEndVersion = latestChunk.getEndVersion()
|
|
if (originalEndVersion !== clientEndVersion) {
|
|
throw new Chunk.ConflictingEndVersion(
|
|
clientEndVersion,
|
|
originalEndVersion
|
|
)
|
|
}
|
|
|
|
currentSnapshot = latestChunk.getSnapshot().clone()
|
|
const timer = new Timer()
|
|
currentSnapshot.applyAll(latestChunk.getChanges())
|
|
|
|
if (!fillChunk(currentChunk, changesToPersist)) return
|
|
checkElapsedTime(timer)
|
|
|
|
return chunkStore.update(projectId, originalEndVersion, currentChunk)
|
|
})
|
|
}
|
|
|
|
function createNewChunksAsNeeded() {
|
|
if (changesToPersist.length === 0) return
|
|
|
|
const endVersion = currentChunk.getEndVersion()
|
|
const history = new History(currentSnapshot.clone(), [])
|
|
const chunk = new Chunk(history, endVersion)
|
|
const timer = new Timer()
|
|
if (fillChunk(chunk, changesToPersist)) {
|
|
checkElapsedTime(timer)
|
|
currentChunk = chunk
|
|
return chunkStore.create(projectId, chunk).then(createNewChunksAsNeeded)
|
|
}
|
|
throw new Error('failed to fill empty chunk')
|
|
}
|
|
|
|
function isOlderThanMinChangeTimestamp(change) {
|
|
return change.getTimestamp().getTime() < limits.minChangeTimestamp
|
|
}
|
|
|
|
function isOlderThanMaxChangeTimestamp(change) {
|
|
return change.getTimestamp().getTime() < limits.maxChangeTimestamp
|
|
}
|
|
|
|
const oldChanges = _.filter(allChanges, isOlderThanMinChangeTimestamp)
|
|
const anyTooOld = _.some(oldChanges, isOlderThanMaxChangeTimestamp)
|
|
const tooManyChanges = oldChanges.length > limits.maxChanges
|
|
const tooManyBytes = totalChangeBytes(oldChanges) > limits.maxChangeBytes
|
|
|
|
if (anyTooOld || tooManyChanges || tooManyBytes) {
|
|
changesToPersist = oldChanges
|
|
const numberOfChangesToPersist = oldChanges.length
|
|
return extendLastChunkIfPossible()
|
|
.then(createNewChunksAsNeeded)
|
|
.then(function () {
|
|
return {
|
|
numberOfChangesPersisted: numberOfChangesToPersist,
|
|
originalEndVersion,
|
|
currentChunk,
|
|
}
|
|
})
|
|
}
|
|
return BPromise.resolve(null)
|
|
}
|