overleaf/services/document-updater/app/js/RangesManager.js
Eric Mc Sween 7444026cc3 Merge pull request #21310 from overleaf/em-validate-tracked-changes
Reapply "Sanity check for tracked changes in document-updater"

GitOrigin-RevId: e7b38d192f5202006f61bd015bba81d751af5413
2024-10-30 09:04:46 +00:00

657 lines
19 KiB
JavaScript

// @ts-check
const RangesTracker = require('@overleaf/ranges-tracker')
const logger = require('@overleaf/logger')
const OError = require('@overleaf/o-error')
const Metrics = require('./Metrics')
const _ = require('lodash')
const { isInsert, isDelete, isComment, getDocLength } = require('./Utils')
/**
* @import { Comment, CommentOp, InsertOp, DeleteOp, HistoryOp, Op } from './types'
* @import { HistoryCommentOp, HistoryDeleteOp, HistoryInsertOp, HistoryRetainOp } from './types'
* @import { HistoryDeleteTrackedChange, HistoryUpdate, Ranges, TrackedChange, Update } from './types'
*/
const RANGE_DELTA_BUCKETS = [0, 1, 2, 3, 4, 5, 10, 20, 50]
const RangesManager = {
MAX_COMMENTS: 500,
MAX_CHANGES: 2000,
/**
* Apply an update to the given doc (lines and ranges) and return new ranges
*
* @param {string} projectId
* @param {string} docId
* @param {Ranges} ranges - ranges before the updates were applied
* @param {Update[]} updates
* @param {string[]} newDocLines - the document lines after the updates were applied
* @param {object} opts
* @param {boolean} [opts.historyRangesSupport] - whether history ranges support is enabled
* @returns {{ newRanges: Ranges, rangesWereCollapsed: boolean, historyUpdates: HistoryUpdate[] }}
*/
applyUpdate(projectId, docId, ranges, updates, newDocLines, opts = {}) {
if (ranges == null) {
ranges = {}
}
if (updates == null) {
updates = []
}
const { changes, comments } = _.cloneDeep(ranges)
const rangesTracker = new RangesTracker(changes, comments)
const [emptyRangeCountBefore, totalRangeCountBefore] =
RangesManager._emptyRangesCount(rangesTracker)
const historyUpdates = []
for (const update of updates) {
const trackingChanges = Boolean(update.meta?.tc)
rangesTracker.track_changes = trackingChanges
if (update.meta?.tc) {
rangesTracker.setIdSeed(update.meta.tc)
}
const historyOps = []
for (const op of update.op) {
let croppedCommentOps = []
if (opts.historyRangesSupport) {
historyOps.push(
getHistoryOp(op, rangesTracker.comments, rangesTracker.changes)
)
if (isDelete(op) && trackingChanges) {
// If a tracked delete overlaps a comment, the comment must be
// cropped. The extent of the cropping is calculated before the
// delete is applied, but the cropping operations are applied
// later, after the delete is applied.
croppedCommentOps = getCroppedCommentOps(op, rangesTracker.comments)
}
} else if (isInsert(op) || isDelete(op)) {
historyOps.push(op)
}
rangesTracker.applyOp(op, { user_id: update.meta?.user_id })
if (croppedCommentOps.length > 0) {
historyOps.push(
...croppedCommentOps.map(op =>
getHistoryOpForComment(op, rangesTracker.changes)
)
)
}
}
if (historyOps.length > 0) {
historyUpdates.push({ ...update, op: historyOps })
}
}
sanityCheckTrackedChanges(
projectId,
docId,
rangesTracker.changes,
getDocLength(newDocLines)
)
if (
rangesTracker.changes?.length > RangesManager.MAX_CHANGES ||
rangesTracker.comments?.length > RangesManager.MAX_COMMENTS
) {
throw new Error('too many comments or tracked changes')
}
try {
// This is a consistency check that all of our ranges and
// comments still match the corresponding text
rangesTracker.validate(newDocLines.join('\n'))
} catch (err) {
logger.error(
{ err, projectId, docId, newDocLines, updates },
'error validating ranges'
)
throw err
}
const [emptyRangeCountAfter, totalRangeCountAfter] =
RangesManager._emptyRangesCount(rangesTracker)
const rangesWereCollapsed =
emptyRangeCountAfter > emptyRangeCountBefore ||
totalRangeCountAfter + 1 < totalRangeCountBefore // also include the case where multiple ranges were removed
// monitor the change in range count, we may want to snapshot before large decreases
if (totalRangeCountAfter < totalRangeCountBefore) {
Metrics.histogram(
'range-delta',
totalRangeCountBefore - totalRangeCountAfter,
RANGE_DELTA_BUCKETS,
{ status_code: rangesWereCollapsed ? 'saved' : 'unsaved' }
)
}
const newRanges = RangesManager._getRanges(rangesTracker)
logger.debug(
{
projectId,
docId,
changesCount: newRanges.changes?.length,
commentsCount: newRanges.comments?.length,
rangesWereCollapsed,
},
'applied updates to ranges'
)
return { newRanges, rangesWereCollapsed, historyUpdates }
},
acceptChanges(projectId, docId, changeIds, ranges, lines) {
const { changes, comments } = ranges
logger.debug(`accepting ${changeIds.length} changes in ranges`)
const rangesTracker = new RangesTracker(changes, comments)
rangesTracker.removeChangeIds(changeIds)
sanityCheckTrackedChanges(
projectId,
docId,
rangesTracker.changes,
getDocLength(lines)
)
const newRanges = RangesManager._getRanges(rangesTracker)
return newRanges
},
deleteComment(commentId, ranges) {
const { changes, comments } = ranges
logger.debug({ commentId }, 'deleting comment in ranges')
const rangesTracker = new RangesTracker(changes, comments)
rangesTracker.removeCommentId(commentId)
const newRanges = RangesManager._getRanges(rangesTracker)
return newRanges
},
/**
*
* @param {object} args
* @param {string} args.docId
* @param {string[]} args.acceptedChangeIds
* @param {TrackedChange[]} args.changes
* @param {string} args.pathname
* @param {string} args.projectHistoryId
* @param {string[]} args.lines
*/
getHistoryUpdatesForAcceptedChanges({
docId,
acceptedChangeIds,
changes,
pathname,
projectHistoryId,
lines,
}) {
/** @type {(change: TrackedChange) => boolean} */
const isAccepted = change => acceptedChangeIds.includes(change.id)
const historyOps = []
// Keep ops in order of offset, with deletes before inserts
const sortedChanges = changes.slice().sort(function (c1, c2) {
const result = c1.op.p - c2.op.p
if (result !== 0) {
return result
} else if (isInsert(c1.op) && isDelete(c2.op)) {
return 1
} else if (isDelete(c1.op) && isInsert(c2.op)) {
return -1
} else {
return 0
}
})
const docLength = getDocLength(lines)
let historyDocLength = docLength
for (const change of sortedChanges) {
if (isDelete(change.op)) {
historyDocLength += change.op.d.length
}
}
let unacceptedDeletes = 0
for (const change of sortedChanges) {
/** @type {HistoryOp | undefined} */
let op
if (isDelete(change.op)) {
if (isAccepted(change)) {
op = {
p: change.op.p,
d: change.op.d,
}
if (unacceptedDeletes > 0) {
op.hpos = op.p + unacceptedDeletes
}
} else {
unacceptedDeletes += change.op.d.length
}
} else if (isInsert(change.op)) {
if (isAccepted(change)) {
op = {
p: change.op.p,
r: change.op.i,
tracking: { type: 'none' },
}
if (unacceptedDeletes > 0) {
op.hpos = op.p + unacceptedDeletes
}
}
}
if (!op) {
continue
}
/** @type {HistoryUpdate} */
const historyOp = {
doc: docId,
op: [op],
meta: {
...change.metadata,
ts: Date.now(),
doc_length: docLength,
pathname,
},
}
if (projectHistoryId) {
historyOp.projectHistoryId = projectHistoryId
}
if (historyOp.meta && historyDocLength !== docLength) {
historyOp.meta.history_doc_length = historyDocLength
}
historyOps.push(historyOp)
if (isDelete(change.op) && isAccepted(change)) {
historyDocLength -= change.op.d.length
}
}
return historyOps
},
_getRanges(rangesTracker) {
// Return the minimal data structure needed, since most documents won't have any
// changes or comments
const response = {}
if (rangesTracker.changes != null && rangesTracker.changes.length > 0) {
response.changes = rangesTracker.changes
}
if (rangesTracker.comments != null && rangesTracker.comments.length > 0) {
response.comments = rangesTracker.comments
}
return response
},
_emptyRangesCount(ranges) {
let emptyCount = 0
let totalCount = 0
for (const comment of ranges.comments || []) {
totalCount++
if (comment.op.c === '') {
emptyCount++
}
}
for (const change of ranges.changes || []) {
totalCount++
if (change.op.i != null) {
if (change.op.i === '') {
emptyCount++
}
}
}
return [emptyCount, totalCount]
},
}
/**
* Calculate ops to be sent to the history system.
*
* @param {Op} op - the editor op
* @param {TrackedChange[]} changes - the list of tracked changes in the
* document before the op is applied. That list, coming from
* RangesTracker is ordered by position.
* @returns {HistoryOp}
*/
function getHistoryOp(op, comments, changes, opts = {}) {
if (isInsert(op)) {
return getHistoryOpForInsert(op, comments, changes)
} else if (isDelete(op)) {
return getHistoryOpForDelete(op, changes)
} else if (isComment(op)) {
return getHistoryOpForComment(op, changes)
} else {
throw new OError('Unrecognized op', { op })
}
}
/**
* Calculate history ops for an insert
*
* Inserts are moved forward by tracked deletes placed strictly before the
* op. When an insert is made at the same position as a tracked delete, the
* insert is placed before the tracked delete.
*
* We also add a commentIds property when inserts are made inside a comment.
* The current behaviour is to include the insert in the comment only if the
* insert is made strictly inside the comment. Inserts made at the edges are
* not included in the comment.
*
* @param {InsertOp} op
* @param {Comment[]} comments
* @param {TrackedChange[]} changes
* @returns {HistoryInsertOp}
*/
function getHistoryOpForInsert(op, comments, changes) {
let hpos = op.p
let trackedDeleteRejection = false
const commentIds = new Set()
for (const comment of comments) {
if (comment.op.p < op.p && op.p < comment.op.p + comment.op.c.length) {
// Insert is inside the comment; add the comment id
commentIds.add(comment.op.t)
}
}
for (const change of changes) {
if (!isDelete(change.op)) {
// We're only interested in tracked deletes
continue
}
if (change.op.p < op.p) {
// Tracked delete is before the op. Move the op forward.
hpos += change.op.d.length
} else if (change.op.p === op.p) {
// Tracked delete is at the same position as the op. The insert comes before
// the tracked delete so it doesn't move.
if (op.u && change.op.d.startsWith(op.i)) {
// We're undoing and the insert matches the start of the tracked
// delete. RangesManager treats this as a tracked delete rejection. We
// will note this in the op so that project-history can take the
// appropriate action.
trackedDeleteRejection = true
}
} else {
// Tracked delete is after the insert. Tracked deletes are ordered, so
// we know that all subsequent tracked deletes will be after the insert
// and we can bail out.
break
}
}
/** @type {HistoryInsertOp} */
const historyOp = { ...op }
if (commentIds.size > 0) {
historyOp.commentIds = Array.from(commentIds)
}
if (hpos !== op.p) {
historyOp.hpos = hpos
}
if (trackedDeleteRejection) {
historyOp.trackedDeleteRejection = true
}
return historyOp
}
/**
* Calculate history op for a delete
*
* Deletes are moved forward by tracked deletes placed before or at the position of the
* op. If a tracked delete is inside the delete, the delete is split in parts
* so that characters are deleted around the tracked delete, but the tracked
* delete itself is not deleted.
*
* @param {DeleteOp} op
* @param {TrackedChange[]} changes
* @returns {HistoryDeleteOp}
*/
function getHistoryOpForDelete(op, changes, opts = {}) {
let hpos = op.p
const opEnd = op.p + op.d.length
/** @type HistoryDeleteTrackedChange[] */
const changesInsideDelete = []
for (const change of changes) {
if (change.op.p <= op.p) {
if (isDelete(change.op)) {
// Tracked delete is before or at the position of the incoming delete.
// Move the op forward.
hpos += change.op.d.length
} else if (isInsert(change.op)) {
const changeEnd = change.op.p + change.op.i.length
const endPos = Math.min(changeEnd, opEnd)
if (endPos > op.p) {
// Part of the tracked insert is inside the delete
changesInsideDelete.push({
type: 'insert',
offset: 0,
length: endPos - op.p,
})
}
}
} else if (change.op.p < op.p + op.d.length) {
// Tracked change inside the deleted text. Record it for the history system.
if (isDelete(change.op)) {
changesInsideDelete.push({
type: 'delete',
offset: change.op.p - op.p,
length: change.op.d.length,
})
} else if (isInsert(change.op)) {
changesInsideDelete.push({
type: 'insert',
offset: change.op.p - op.p,
length: Math.min(change.op.i.length, opEnd - change.op.p),
})
}
} else {
// We've seen all tracked changes before or inside the delete
break
}
}
/** @type {HistoryDeleteOp} */
const historyOp = { ...op }
if (hpos !== op.p) {
historyOp.hpos = hpos
}
if (changesInsideDelete.length > 0) {
historyOp.trackedChanges = changesInsideDelete
}
return historyOp
}
/**
* Calculate history ops for a comment
*
* Comments are moved forward by tracked deletes placed before or at the
* position of the op. If a tracked delete is inside the comment, the length of
* the comment is extended to include the tracked delete.
*
* @param {CommentOp} op
* @param {TrackedChange[]} changes
* @returns {HistoryCommentOp}
*/
function getHistoryOpForComment(op, changes) {
let hpos = op.p
let hlen = op.c.length
for (const change of changes) {
if (!isDelete(change.op)) {
// We're only interested in tracked deletes
continue
}
if (change.op.p <= op.p) {
// Tracked delete is before or at the position of the incoming comment.
// Move the op forward.
hpos += change.op.d.length
} else if (change.op.p < op.p + op.c.length) {
// Tracked comment inside the comment. Extend the length
hlen += change.op.d.length
} else {
// We've seen all tracked deletes before or inside the comment
break
}
}
/** @type {HistoryCommentOp} */
const historyOp = { ...op }
if (hpos !== op.p) {
historyOp.hpos = hpos
}
if (hlen !== op.c.length) {
historyOp.hlen = hlen
}
return historyOp
}
/**
* Return the ops necessary to properly crop comments when a tracked delete is
* received
*
* The editor treats a tracked delete as a proper delete and updates the
* comment range accordingly. The history doesn't do that and remembers the
* extent of the comment in the tracked delete. In order to keep the history
* consistent with the editor, we'll send ops that will crop the comment in
* the history.
*
* @param {DeleteOp} op
* @param {Comment[]} comments
* @returns {CommentOp[]}
*/
function getCroppedCommentOps(op, comments) {
const deleteStart = op.p
const deleteLength = op.d.length
const deleteEnd = deleteStart + deleteLength
/** @type {HistoryCommentOp[]} */
const historyCommentOps = []
for (const comment of comments) {
const commentStart = comment.op.p
const commentLength = comment.op.c.length
const commentEnd = commentStart + commentLength
if (deleteStart <= commentStart && deleteEnd > commentStart) {
// The comment overlaps the start of the comment or all of it.
const overlapLength = Math.min(deleteEnd, commentEnd) - commentStart
/** @type {CommentOp} */
const commentOp = {
p: deleteStart,
c: comment.op.c.slice(overlapLength),
t: comment.op.t,
}
if (comment.op.resolved) {
commentOp.resolved = true
}
historyCommentOps.push(commentOp)
} else if (
deleteStart > commentStart &&
deleteStart < commentEnd &&
deleteEnd >= commentEnd
) {
// The comment overlaps the end of the comment.
const overlapLength = commentEnd - deleteStart
/** @type {CommentOp} */
const commentOp = {
p: commentStart,
c: comment.op.c.slice(0, -overlapLength),
t: comment.op.t,
}
if (comment.op.resolved) {
commentOp.resolved = true
}
historyCommentOps.push(commentOp)
}
}
return historyCommentOps
}
/**
* Check some tracked changes assumptions:
*
* - Tracked changes can't be empty
* - Tracked inserts can't overlap with another tracked change
* - There can't be two tracked deletes at the same position
* - Ranges should be ordered by position, deletes before inserts
*
* If any assumption isn't upheld, log a warning.
*
* @param {string} projectId
* @param {string} docId
* @param {TrackedChange[]} changes
* @param {number} docLength
*/
function sanityCheckTrackedChanges(projectId, docId, changes, docLength) {
let lastDeletePos = -1 // allow a tracked delete at position 0
let lastInsertEnd = 0
let ok = true
let badChangeIndex
for (let i = 0; i < changes.length; i++) {
const change = changes[i]
const op = change.op
if ('i' in op) {
if (
op.i.length === 0 ||
op.p < lastDeletePos ||
op.p < lastInsertEnd ||
op.p < 0 ||
op.p + op.i.length > docLength
) {
ok = false
badChangeIndex = i
break
}
lastInsertEnd = op.p + op.i.length
} else if ('d' in op) {
if (
op.d.length === 0 ||
op.p <= lastDeletePos ||
op.p < lastInsertEnd ||
op.p < 0 ||
op.p > docLength
) {
ok = false
badChangeIndex = i
break
}
lastDeletePos = op.p
if (lastDeletePos >= docLength) {
badChangeIndex = i
break
}
}
}
if (ok) {
return
}
const changeRanges = []
for (const change of changes) {
if ('i' in change.op) {
changeRanges.push({
id: change.id,
p: change.op.p,
i: change.op.i.length,
})
} else if ('d' in change.op) {
changeRanges.push({
id: change.id,
p: change.op.p,
d: change.op.d.length,
})
}
}
logger.warn(
{ projectId, docId, changes: changeRanges, badChangeIndex },
'Malformed tracked changes detected'
)
}
module.exports = RangesManager