mirror of
https://github.com/overleaf/overleaf.git
synced 2024-12-26 19:54:25 +00:00
ee85d948e2
GitOrigin-RevId: ef2ef77e26df59d1af3df6dc664e284d3c70102d
302 lines
8.7 KiB
JavaScript
302 lines
8.7 KiB
JavaScript
import OError from '@overleaf/o-error'
|
|
import DMP from 'diff-match-patch'
|
|
|
|
const MAX_TIME_BETWEEN_UPDATES = 60 * 1000 // one minute
|
|
const MAX_UPDATE_SIZE = 2 * 1024 * 1024 // 2 MB
|
|
const ADDED = 1
|
|
const REMOVED = -1
|
|
const UNCHANGED = 0
|
|
|
|
const strInject = (s1, pos, s2) => s1.slice(0, pos) + s2 + s1.slice(pos)
|
|
const strRemove = (s1, pos, length) => s1.slice(0, pos) + s1.slice(pos + length)
|
|
|
|
const dmp = new DMP()
|
|
dmp.Diff_Timeout = 0.1 // prevent the diff algorithm from searching too hard for changes in unrelated content
|
|
|
|
const cloneWithOp = function (update, op) {
|
|
// to improve performance, shallow clone the update
|
|
// and its meta property (also an object), then
|
|
// overwrite the op property directly.
|
|
update = Object.assign({}, update)
|
|
update.meta = Object.assign({}, update.meta)
|
|
update.op = op
|
|
return update
|
|
}
|
|
const mergeUpdatesWithOp = function (firstUpdate, secondUpdate, op) {
|
|
// We want to take doc_length and ts from the firstUpdate, v from the second
|
|
const update = cloneWithOp(firstUpdate, op)
|
|
if (secondUpdate.v != null) {
|
|
update.v = secondUpdate.v
|
|
}
|
|
return update
|
|
}
|
|
|
|
const adjustLengthByOp = function (length, op) {
|
|
if (op.i != null) {
|
|
return length + op.i.length
|
|
} else if (op.d != null) {
|
|
return length - op.d.length
|
|
} else {
|
|
throw new OError('unexpected op type')
|
|
}
|
|
}
|
|
|
|
// Updates come from the doc updater in format
|
|
// {
|
|
// op: [ { ... op1 ... }, { ... op2 ... } ]
|
|
// meta: { ts: ..., user_id: ... }
|
|
// }
|
|
// but it's easier to work with on op per update, so convert these updates to
|
|
// our compressed format
|
|
// [{
|
|
// op: op1
|
|
// meta: { ts: ..., user_id: ... }
|
|
// }, {
|
|
// op: op2
|
|
// meta: { ts: ..., user_id: ... }
|
|
// }]
|
|
export function convertToSingleOpUpdates(updates) {
|
|
const splitUpdates = []
|
|
for (const update of updates) {
|
|
if (update.op == null) {
|
|
// Not a text op, likely a project strucure op
|
|
splitUpdates.push(update)
|
|
continue
|
|
}
|
|
// Reject any non-insert or delete ops, i.e. comments
|
|
const ops = update.op.filter(o => o.i != null || o.d != null)
|
|
let { doc_length: docLength } = update.meta
|
|
for (const op of ops) {
|
|
const splitUpdate = cloneWithOp(update, op)
|
|
if (docLength != null) {
|
|
splitUpdate.meta.doc_length = docLength
|
|
docLength = adjustLengthByOp(docLength, op)
|
|
}
|
|
splitUpdates.push(splitUpdate)
|
|
}
|
|
}
|
|
return splitUpdates
|
|
}
|
|
|
|
export function filterBlankUpdates(updates) {
|
|
// Diffing an insert and delete can return blank inserts and deletes
|
|
// which the OL history service doesn't have an equivalent for.
|
|
//
|
|
// NOTE: this relies on the updates only containing either op.i or op.d entries
|
|
// but not both, which is the case because diffAsShareJsOps does this
|
|
return updates.filter(
|
|
update => !(update.op && (update.op.i === '' || update.op.d === ''))
|
|
)
|
|
}
|
|
|
|
export function concatUpdatesWithSameVersion(updates) {
|
|
const concattedUpdates = []
|
|
for (let update of updates) {
|
|
if (update.op != null) {
|
|
update = cloneWithOp(update, [update.op])
|
|
|
|
const lastUpdate = concattedUpdates[concattedUpdates.length - 1]
|
|
if (
|
|
lastUpdate != null &&
|
|
lastUpdate.op != null &&
|
|
lastUpdate.v === update.v &&
|
|
lastUpdate.doc === update.doc &&
|
|
lastUpdate.pathname === update.pathname
|
|
) {
|
|
lastUpdate.op = lastUpdate.op.concat(update.op)
|
|
} else {
|
|
concattedUpdates.push(update)
|
|
}
|
|
} else {
|
|
concattedUpdates.push(update)
|
|
}
|
|
}
|
|
return concattedUpdates
|
|
}
|
|
|
|
export function compressRawUpdates(rawUpdates) {
|
|
let updates = convertToSingleOpUpdates(rawUpdates)
|
|
updates = compressUpdates(updates)
|
|
updates = filterBlankUpdates(updates)
|
|
updates = concatUpdatesWithSameVersion(updates)
|
|
return updates
|
|
}
|
|
|
|
export function compressUpdates(updates) {
|
|
if (updates.length === 0) {
|
|
return []
|
|
}
|
|
|
|
let compressedUpdates = [updates.shift()]
|
|
for (const update of updates) {
|
|
const lastCompressedUpdate = compressedUpdates.pop()
|
|
if (lastCompressedUpdate != null) {
|
|
const newCompressedUpdates = _concatTwoUpdates(
|
|
lastCompressedUpdate,
|
|
update
|
|
)
|
|
|
|
compressedUpdates = compressedUpdates.concat(newCompressedUpdates)
|
|
} else {
|
|
compressedUpdates.push(update)
|
|
}
|
|
}
|
|
|
|
return compressedUpdates
|
|
}
|
|
|
|
function _concatTwoUpdates(firstUpdate, secondUpdate) {
|
|
// Previously we cloned firstUpdate and secondUpdate at this point but we
|
|
// can skip this step because whenever they are returned with
|
|
// modification there is always a clone at that point via
|
|
// mergeUpdatesWithOp.
|
|
|
|
let offset
|
|
if (firstUpdate.op == null || secondUpdate.op == null) {
|
|
// Project structure ops
|
|
return [firstUpdate, secondUpdate]
|
|
}
|
|
|
|
if (
|
|
firstUpdate.doc !== secondUpdate.doc ||
|
|
firstUpdate.pathname !== secondUpdate.pathname
|
|
) {
|
|
return [firstUpdate, secondUpdate]
|
|
}
|
|
|
|
if (firstUpdate.meta.user_id !== secondUpdate.meta.user_id) {
|
|
return [firstUpdate, secondUpdate]
|
|
}
|
|
|
|
if (
|
|
(firstUpdate.meta.type === 'external' &&
|
|
secondUpdate.meta.type !== 'external') ||
|
|
(firstUpdate.meta.type !== 'external' &&
|
|
secondUpdate.meta.type === 'external') ||
|
|
(firstUpdate.meta.type === 'external' &&
|
|
secondUpdate.meta.type === 'external' &&
|
|
firstUpdate.meta.source !== secondUpdate.meta.source)
|
|
) {
|
|
return [firstUpdate, secondUpdate]
|
|
}
|
|
|
|
if (secondUpdate.meta.ts - firstUpdate.meta.ts > MAX_TIME_BETWEEN_UPDATES) {
|
|
return [firstUpdate, secondUpdate]
|
|
}
|
|
|
|
const firstOp = firstUpdate.op
|
|
const secondOp = secondUpdate.op
|
|
const firstSize =
|
|
(firstOp.i && firstOp.i.length) || (firstOp.d && firstOp.d.length)
|
|
const secondSize =
|
|
(secondOp.i && secondOp.i.length) || (secondOp.d && secondOp.d.length)
|
|
const firstOpInsideSecondOp =
|
|
secondOp.p <= firstOp.p && firstOp.p <= secondOp.p + secondSize
|
|
const secondOpInsideFirstOp =
|
|
firstOp.p <= secondOp.p && secondOp.p <= firstOp.p + firstSize
|
|
const combinedLengthUnderLimit = firstSize + secondSize < MAX_UPDATE_SIZE
|
|
|
|
// Two inserts
|
|
if (
|
|
firstOp.i != null &&
|
|
secondOp.i != null &&
|
|
secondOpInsideFirstOp &&
|
|
combinedLengthUnderLimit
|
|
) {
|
|
return [
|
|
mergeUpdatesWithOp(firstUpdate, secondUpdate, {
|
|
p: firstOp.p,
|
|
i: strInject(firstOp.i, secondOp.p - firstOp.p, secondOp.i),
|
|
}),
|
|
]
|
|
// Two deletes
|
|
} else if (
|
|
firstOp.d != null &&
|
|
secondOp.d != null &&
|
|
firstOpInsideSecondOp &&
|
|
combinedLengthUnderLimit
|
|
) {
|
|
return [
|
|
mergeUpdatesWithOp(firstUpdate, secondUpdate, {
|
|
p: secondOp.p,
|
|
d: strInject(secondOp.d, firstOp.p - secondOp.p, firstOp.d),
|
|
}),
|
|
]
|
|
// An insert and then a delete
|
|
} else if (firstOp.i != null && secondOp.d != null && secondOpInsideFirstOp) {
|
|
offset = secondOp.p - firstOp.p
|
|
const insertedText = firstOp.i.slice(offset, offset + secondOp.d.length)
|
|
// Only trim the insert when the delete is fully contained within in it
|
|
if (insertedText === secondOp.d) {
|
|
const insert = strRemove(firstOp.i, offset, secondOp.d.length)
|
|
if (insert === '') {
|
|
return []
|
|
} else {
|
|
return [
|
|
mergeUpdatesWithOp(firstUpdate, secondUpdate, {
|
|
p: firstOp.p,
|
|
i: insert,
|
|
}),
|
|
]
|
|
}
|
|
} else {
|
|
// This will only happen if the delete extends outside the insert
|
|
return [firstUpdate, secondUpdate]
|
|
}
|
|
|
|
// A delete then an insert at the same place, likely a copy-paste of a chunk of content
|
|
} else if (
|
|
firstOp.d != null &&
|
|
secondOp.i != null &&
|
|
firstOp.p === secondOp.p
|
|
) {
|
|
offset = firstOp.p
|
|
const diffUpdates = diffAsShareJsOps(firstOp.d, secondOp.i).map(function (
|
|
op
|
|
) {
|
|
op.p += offset
|
|
return mergeUpdatesWithOp(firstUpdate, secondUpdate, op)
|
|
})
|
|
|
|
// Doing a diff like this loses track of the doc lengths for each
|
|
// update, so recalculate them
|
|
let { doc_length: docLength } = firstUpdate.meta
|
|
for (const update of diffUpdates) {
|
|
update.meta.doc_length = docLength
|
|
docLength = adjustLengthByOp(docLength, update.op)
|
|
}
|
|
|
|
return diffUpdates
|
|
} else {
|
|
return [firstUpdate, secondUpdate]
|
|
}
|
|
}
|
|
|
|
export function diffAsShareJsOps(before, after) {
|
|
const diffs = dmp.diff_main(before, after)
|
|
dmp.diff_cleanupSemantic(diffs)
|
|
|
|
const ops = []
|
|
let position = 0
|
|
for (const diff of diffs) {
|
|
const type = diff[0]
|
|
const content = diff[1]
|
|
if (type === ADDED) {
|
|
ops.push({
|
|
i: content,
|
|
p: position,
|
|
})
|
|
position += content.length
|
|
} else if (type === REMOVED) {
|
|
ops.push({
|
|
d: content,
|
|
p: position,
|
|
})
|
|
} else if (type === UNCHANGED) {
|
|
position += content.length
|
|
} else {
|
|
throw new Error('Unknown type')
|
|
}
|
|
}
|
|
return ops
|
|
}
|