mirror of
https://github.com/overleaf/overleaf.git
synced 2025-01-12 14:42:29 +00:00
e4cc2a0816
* Fix history diffs when deleting over many tracked deletes As we are looping through tracked deletes, the offset between the result positions and the source positions must be kept constant. Otherwise, the tracked deletes are translated as we delete text and move the source cursor. GitOrigin-RevId: b2417a75219aaa16bf5c61e0ebcb0586cae6aef2
621 lines
18 KiB
JavaScript
621 lines
18 KiB
JavaScript
import _ from 'lodash'
|
|
import logger from '@overleaf/logger'
|
|
import OError from '@overleaf/o-error'
|
|
import * as HistoryStoreManager from './HistoryStoreManager.js'
|
|
import * as WebApiManager from './WebApiManager.js'
|
|
import * as Errors from './Errors.js'
|
|
import {
|
|
TextOperation,
|
|
InsertOp,
|
|
RemoveOp,
|
|
RetainOp,
|
|
Range,
|
|
TrackedChangeList,
|
|
} from 'overleaf-editor-core'
|
|
|
|
/**
|
|
* @typedef {import('overleaf-editor-core/lib/types').RawEditOperation} RawEditOperation
|
|
* @typedef {import('overleaf-editor-core/lib/types').TrackedChangeRawData} TrackedChangeRawData
|
|
*/
|
|
|
|
export function convertToSummarizedUpdates(chunk, callback) {
|
|
const version = chunk.chunk.startVersion
|
|
const { files } = chunk.chunk.history.snapshot
|
|
const builder = new UpdateSetBuilder(version, files)
|
|
|
|
for (const change of chunk.chunk.history.changes) {
|
|
try {
|
|
builder.applyChange(change)
|
|
} catch (error1) {
|
|
const error = error1
|
|
return callback(error)
|
|
}
|
|
}
|
|
callback(null, builder.summarizedUpdates)
|
|
}
|
|
|
|
export function convertToDiffUpdates(
|
|
projectId,
|
|
chunk,
|
|
pathname,
|
|
fromVersion,
|
|
toVersion,
|
|
callback
|
|
) {
|
|
let error
|
|
let version = chunk.chunk.startVersion
|
|
const { files } = chunk.chunk.history.snapshot
|
|
const builder = new UpdateSetBuilder(version, files)
|
|
|
|
let file = null
|
|
for (const change of chunk.chunk.history.changes) {
|
|
// Because we're referencing by pathname, which can change, we
|
|
// want to get the first file in the range fromVersion:toVersion
|
|
// that has the pathname we want. Note that this might not exist yet
|
|
// at fromVersion, so we'll just settle for the first one we find
|
|
// after that.
|
|
if (fromVersion <= version && version <= toVersion) {
|
|
if (file == null) {
|
|
file = builder.getFile(pathname)
|
|
}
|
|
}
|
|
|
|
try {
|
|
builder.applyChange(change)
|
|
} catch (error1) {
|
|
error = error1
|
|
return callback(error)
|
|
}
|
|
version += 1
|
|
}
|
|
// Versions act as fence posts, with updates taking us from one to another,
|
|
// so we also need to check after the final update, when we're at the last version.
|
|
if (fromVersion <= version && version <= toVersion) {
|
|
if (file == null) {
|
|
file = builder.getFile(pathname)
|
|
}
|
|
}
|
|
|
|
// return an empty diff if the file was flagged as missing with an explicit null
|
|
if (builder.getFile(pathname) === null) {
|
|
return callback(null, { initialContent: '', updates: [] })
|
|
}
|
|
|
|
if (file == null) {
|
|
error = new Errors.NotFoundError(
|
|
`pathname '${pathname}' not found in range`
|
|
)
|
|
return callback(error)
|
|
}
|
|
|
|
WebApiManager.getHistoryId(projectId, (err, historyId) => {
|
|
if (err) {
|
|
return callback(err)
|
|
}
|
|
file.getDiffUpdates(historyId, fromVersion, toVersion, callback)
|
|
})
|
|
}
|
|
|
|
class UpdateSetBuilder {
|
|
constructor(startVersion, files) {
|
|
this.version = startVersion
|
|
this.summarizedUpdates = []
|
|
|
|
this.files = Object.create(null)
|
|
for (const pathname in files) {
|
|
// initialize file from snapshot
|
|
const data = files[pathname]
|
|
this.files[pathname] = new File(pathname, data, startVersion)
|
|
}
|
|
}
|
|
|
|
getFile(pathname) {
|
|
return this.files[pathname]
|
|
}
|
|
|
|
applyChange(change) {
|
|
const timestamp = new Date(change.timestamp)
|
|
let authors = _.map(change.authors, id => {
|
|
if (id == null) {
|
|
return null
|
|
}
|
|
return id
|
|
})
|
|
authors = authors.concat(change.v2Authors || [])
|
|
this.currentUpdate = {
|
|
meta: {
|
|
users: authors,
|
|
start_ts: timestamp.getTime(),
|
|
end_ts: timestamp.getTime(),
|
|
},
|
|
v: this.version,
|
|
pathnames: new Set([]),
|
|
project_ops: [],
|
|
}
|
|
if (change.origin) {
|
|
this.currentUpdate.meta.origin = change.origin
|
|
}
|
|
|
|
for (const op of change.operations) {
|
|
this.applyOperation(op, timestamp, authors)
|
|
}
|
|
|
|
this.currentUpdate.pathnames = Array.from(this.currentUpdate.pathnames)
|
|
this.summarizedUpdates.push(this.currentUpdate)
|
|
|
|
this.version += 1
|
|
}
|
|
|
|
applyOperation(op, timestamp, authors) {
|
|
if (UpdateSetBuilder._isTextOperation(op)) {
|
|
this.applyTextOperation(op, timestamp, authors)
|
|
} else if (UpdateSetBuilder._isRenameOperation(op)) {
|
|
this.applyRenameOperation(op, timestamp, authors)
|
|
} else if (UpdateSetBuilder._isRemoveFileOperation(op)) {
|
|
this.applyRemoveFileOperation(op, timestamp, authors)
|
|
} else if (UpdateSetBuilder._isAddFileOperation(op)) {
|
|
this.applyAddFileOperation(op, timestamp, authors)
|
|
}
|
|
}
|
|
|
|
applyTextOperation(operation, timestamp, authors) {
|
|
const { pathname } = operation
|
|
if (pathname === '') {
|
|
// this shouldn't happen, but we continue to allow the user to see the history
|
|
logger.warn(
|
|
{ operation, timestamp, authors },
|
|
'pathname is empty for text operation'
|
|
)
|
|
return
|
|
}
|
|
|
|
const file = this.files[pathname]
|
|
if (file == null) {
|
|
// this shouldn't happen, but we continue to allow the user to see the history
|
|
logger.warn(
|
|
{ operation, timestamp, authors },
|
|
'file is missing for text operation'
|
|
)
|
|
this.files[pathname] = null // marker for a missing file
|
|
return
|
|
}
|
|
|
|
file.applyTextOperation(authors, timestamp, this.version, operation)
|
|
this.currentUpdate.pathnames.add(pathname)
|
|
}
|
|
|
|
applyRenameOperation(operation, timestamp, authors) {
|
|
const { pathname, newPathname } = operation
|
|
const file = this.files[pathname]
|
|
if (file == null) {
|
|
// this shouldn't happen, but we continue to allow the user to see the history
|
|
logger.warn(
|
|
{ operation, timestamp, authors },
|
|
'file is missing for rename operation'
|
|
)
|
|
this.files[pathname] = null // marker for a missing file
|
|
return
|
|
}
|
|
|
|
file.rename(newPathname)
|
|
delete this.files[pathname]
|
|
this.files[newPathname] = file
|
|
|
|
this.currentUpdate.project_ops.push({
|
|
rename: { pathname, newPathname },
|
|
})
|
|
}
|
|
|
|
applyAddFileOperation(operation, timestamp, authors) {
|
|
const { pathname } = operation
|
|
// add file
|
|
this.files[pathname] = new File(pathname, operation.file, this.version)
|
|
|
|
this.currentUpdate.project_ops.push({ add: { pathname } })
|
|
}
|
|
|
|
applyRemoveFileOperation(operation, timestamp, authors) {
|
|
const { pathname } = operation
|
|
const file = this.files[pathname]
|
|
if (file == null) {
|
|
// this shouldn't happen, but we continue to allow the user to see the history
|
|
logger.warn(
|
|
{ operation, timestamp, authors },
|
|
'pathname not found when removing file'
|
|
)
|
|
this.files[pathname] = null // marker for a missing file
|
|
return
|
|
}
|
|
|
|
delete this.files[pathname]
|
|
|
|
this.currentUpdate.project_ops.push({ remove: { pathname } })
|
|
}
|
|
|
|
static _isTextOperation(op) {
|
|
return Object.prototype.hasOwnProperty.call(op, 'textOperation')
|
|
}
|
|
|
|
static _isRenameOperation(op) {
|
|
return (
|
|
Object.prototype.hasOwnProperty.call(op, 'newPathname') &&
|
|
op.newPathname !== ''
|
|
)
|
|
}
|
|
|
|
static _isRemoveFileOperation(op) {
|
|
return (
|
|
Object.prototype.hasOwnProperty.call(op, 'newPathname') &&
|
|
op.newPathname === ''
|
|
)
|
|
}
|
|
|
|
static _isAddFileOperation(op) {
|
|
return Object.prototype.hasOwnProperty.call(op, 'file')
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param {string} content
|
|
* @param {TrackedChangeList} trackedChanges
|
|
* @returns {string}
|
|
*/
|
|
function removeTrackedDeletesFromString(content, trackedChanges) {
|
|
let result = ''
|
|
let cursor = 0
|
|
const trackedDeletes = trackedChanges
|
|
.asSorted()
|
|
.filter(tc => tc.tracking.type === 'delete')
|
|
for (const trackedChange of trackedDeletes) {
|
|
if (cursor < trackedChange.range.start) {
|
|
result += content.slice(cursor, trackedChange.range.start)
|
|
}
|
|
// skip the tracked change itself
|
|
cursor = trackedChange.range.end
|
|
}
|
|
result += content.slice(cursor)
|
|
return result
|
|
}
|
|
|
|
class File {
|
|
constructor(pathname, snapshot, initialVersion) {
|
|
this.pathname = pathname
|
|
this.snapshot = snapshot
|
|
this.initialVersion = initialVersion
|
|
this.operations = []
|
|
}
|
|
|
|
applyTextOperation(authors, timestamp, version, operation) {
|
|
this.operations.push({ authors, timestamp, version, operation })
|
|
}
|
|
|
|
rename(pathname) {
|
|
this.pathname = pathname
|
|
}
|
|
|
|
getDiffUpdates(historyId, fromVersion, toVersion, callback) {
|
|
if (this.snapshot.stringLength == null) {
|
|
// Binary file
|
|
return callback(null, { binary: true })
|
|
}
|
|
this._loadContentAndRanges(historyId, (error, content, ranges) => {
|
|
if (error != null) {
|
|
return callback(OError.tag(error))
|
|
}
|
|
const trackedChanges = TrackedChangeList.fromRaw(
|
|
ranges?.trackedChanges || []
|
|
)
|
|
/** @type {string | undefined} */
|
|
let initialContent
|
|
const updates = []
|
|
|
|
for (let operation of this.operations) {
|
|
if (!('textOperation' in operation.operation)) {
|
|
// We only care about text operations
|
|
continue
|
|
}
|
|
let authors, ops, timestamp, version
|
|
;({ authors, timestamp, version, operation } = operation)
|
|
// Set the initialContent to the latest version we have before the diff
|
|
// begins. 'version' here refers to the document version as we are
|
|
// applying the updates. So we store the content *before* applying the
|
|
// updates.
|
|
if (version >= fromVersion && initialContent === undefined) {
|
|
initialContent = removeTrackedDeletesFromString(
|
|
content,
|
|
trackedChanges
|
|
)
|
|
}
|
|
|
|
;({ content, ops } = this._convertTextOperation(
|
|
content,
|
|
operation,
|
|
trackedChanges
|
|
))
|
|
|
|
// We only need to return the updates between fromVersion and toVersion
|
|
if (fromVersion <= version && version < toVersion) {
|
|
updates.push({
|
|
meta: {
|
|
users: authors,
|
|
start_ts: timestamp.getTime(),
|
|
end_ts: timestamp.getTime(),
|
|
},
|
|
v: version,
|
|
op: ops,
|
|
})
|
|
}
|
|
}
|
|
|
|
if (initialContent === undefined) {
|
|
initialContent = removeTrackedDeletesFromString(content, trackedChanges)
|
|
}
|
|
callback(null, { initialContent, updates })
|
|
})
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {string} initialContent
|
|
* @param {RawEditOperation} operation
|
|
* @param {TrackedChangeList} trackedChanges
|
|
*/
|
|
_convertTextOperation(initialContent, operation, trackedChanges) {
|
|
const textOp = TextOperation.fromJSON(operation)
|
|
const textUpdateBuilder = new TextUpdateBuilder(
|
|
initialContent,
|
|
trackedChanges
|
|
)
|
|
for (const op of textOp.ops) {
|
|
textUpdateBuilder.applyOp(op)
|
|
}
|
|
textUpdateBuilder.finish()
|
|
return {
|
|
content: textUpdateBuilder.result,
|
|
ops: textUpdateBuilder.changes,
|
|
}
|
|
}
|
|
|
|
_loadContentAndRanges(historyId, callback) {
|
|
HistoryStoreManager.getProjectBlob(
|
|
historyId,
|
|
this.snapshot.hash,
|
|
(err, content) => {
|
|
if (err) {
|
|
return callback(err)
|
|
}
|
|
if (this.snapshot.rangesHash) {
|
|
HistoryStoreManager.getProjectBlob(
|
|
historyId,
|
|
this.snapshot.rangesHash,
|
|
(err, ranges) => {
|
|
if (err) {
|
|
return callback(err)
|
|
}
|
|
return callback(null, content, JSON.parse(ranges))
|
|
}
|
|
)
|
|
} else {
|
|
return callback(null, content, undefined)
|
|
}
|
|
}
|
|
)
|
|
}
|
|
}
|
|
|
|
class TextUpdateBuilder {
|
|
/**
|
|
*
|
|
* @param {string} source
|
|
* @param {TrackedChangeList} ranges
|
|
*/
|
|
constructor(source, ranges) {
|
|
this.trackedChanges = ranges
|
|
this.source = source
|
|
this.sourceCursor = 0
|
|
this.result = ''
|
|
/** @type {({i: string, p: number} | {d: string, p: number})[]} */
|
|
this.changes = []
|
|
}
|
|
|
|
applyOp(op) {
|
|
if (op instanceof RetainOp) {
|
|
const length = this.result.length
|
|
this.applyRetain(op)
|
|
this.trackedChanges.applyRetain(length, op.length, {
|
|
tracking: op.tracking,
|
|
})
|
|
}
|
|
|
|
if (op instanceof InsertOp) {
|
|
const length = this.result.length
|
|
this.applyInsert(op)
|
|
this.trackedChanges.applyInsert(length, op.insertion, {
|
|
tracking: op.tracking,
|
|
})
|
|
}
|
|
|
|
if (op instanceof RemoveOp) {
|
|
const length = this.result.length
|
|
this.applyDelete(op)
|
|
this.trackedChanges.applyDelete(length, op.length)
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {RetainOp} retain
|
|
*/
|
|
applyRetain(retain) {
|
|
const resultRetentionRange = new Range(this.result.length, retain.length)
|
|
const sourceRetentionRange = new Range(this.sourceCursor, retain.length)
|
|
|
|
let scanCursor = this.result.length
|
|
if (retain.tracking) {
|
|
// We are modifying existing tracked deletes. We need to treat removal
|
|
// (type insert/none) of a tracked delete as an insertion. Similarly, any
|
|
// range we introduce as a tracked deletion must be reported as a deletion.
|
|
const trackedDeletes = this.trackedChanges
|
|
.asSorted()
|
|
.filter(
|
|
tc =>
|
|
tc.tracking.type === 'delete' &&
|
|
tc.range.overlaps(resultRetentionRange)
|
|
)
|
|
|
|
const sourceOffset = this.sourceCursor - this.result.length
|
|
for (const trackedDelete of trackedDeletes) {
|
|
const resultTrackedDelete = trackedDelete.range
|
|
const sourceTrackedDelete = trackedDelete.range.moveBy(sourceOffset)
|
|
|
|
if (scanCursor < resultTrackedDelete.start) {
|
|
if (retain.tracking.type === 'delete') {
|
|
this.changes.push({
|
|
d: this.source.slice(
|
|
this.sourceCursor,
|
|
sourceTrackedDelete.start
|
|
),
|
|
p: this.result.length,
|
|
})
|
|
}
|
|
this.result += this.source.slice(
|
|
this.sourceCursor,
|
|
sourceTrackedDelete.start
|
|
)
|
|
scanCursor = resultTrackedDelete.start
|
|
this.sourceCursor = sourceTrackedDelete.start
|
|
}
|
|
const endOfInsertionResult = Math.min(
|
|
resultTrackedDelete.end,
|
|
resultRetentionRange.end
|
|
)
|
|
const endOfInsertionSource = Math.min(
|
|
sourceTrackedDelete.end,
|
|
sourceRetentionRange.end
|
|
)
|
|
const text = this.source.slice(this.sourceCursor, endOfInsertionSource)
|
|
if (
|
|
retain.tracking.type === 'none' ||
|
|
retain.tracking.type === 'insert'
|
|
) {
|
|
this.changes.push({
|
|
i: text,
|
|
p: this.result.length,
|
|
})
|
|
}
|
|
this.result += text
|
|
// skip the tracked delete itself
|
|
scanCursor = endOfInsertionResult
|
|
this.sourceCursor = endOfInsertionSource
|
|
|
|
if (scanCursor >= resultRetentionRange.end) {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
if (scanCursor < resultRetentionRange.end) {
|
|
// The last region is not a tracked delete. But we should still handle
|
|
// a new tracked delete as a deletion.
|
|
const text = this.source.slice(
|
|
this.sourceCursor,
|
|
sourceRetentionRange.end
|
|
)
|
|
if (retain.tracking?.type === 'delete') {
|
|
this.changes.push({
|
|
d: text,
|
|
p: this.result.length,
|
|
})
|
|
}
|
|
this.result += text
|
|
}
|
|
this.sourceCursor = sourceRetentionRange.end
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {InsertOp} insert
|
|
*/
|
|
applyInsert(insert) {
|
|
if (insert.tracking?.type !== 'delete') {
|
|
// Skip tracked deletions
|
|
this.changes.push({
|
|
i: insert.insertion,
|
|
p: this.result.length,
|
|
})
|
|
}
|
|
this.result += insert.insertion
|
|
// The source cursor doesn't advance
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {RemoveOp} deletion
|
|
*/
|
|
applyDelete(deletion) {
|
|
const sourceDeletionRange = new Range(this.sourceCursor, deletion.length)
|
|
const resultDeletionRange = new Range(this.result.length, deletion.length)
|
|
|
|
const trackedDeletes = this.trackedChanges
|
|
.asSorted()
|
|
.filter(
|
|
tc =>
|
|
tc.tracking.type === 'delete' &&
|
|
tc.range.overlaps(resultDeletionRange)
|
|
)
|
|
.sort((a, b) => a.range.start - b.range.start)
|
|
|
|
let scanCursor = this.result.length
|
|
const sourceOffset = this.sourceCursor - this.result.length
|
|
|
|
for (const trackedDelete of trackedDeletes) {
|
|
const resultTrackDeleteRange = trackedDelete.range
|
|
const sourceTrackDeleteRange = trackedDelete.range.moveBy(sourceOffset)
|
|
|
|
if (scanCursor < resultTrackDeleteRange.start) {
|
|
this.changes.push({
|
|
d: this.source.slice(this.sourceCursor, sourceTrackDeleteRange.start),
|
|
p: this.result.length,
|
|
})
|
|
}
|
|
// skip the tracked delete itself
|
|
scanCursor = Math.min(resultTrackDeleteRange.end, resultDeletionRange.end)
|
|
this.sourceCursor = Math.min(
|
|
sourceTrackDeleteRange.end,
|
|
sourceDeletionRange.end
|
|
)
|
|
|
|
if (scanCursor >= resultDeletionRange.end) {
|
|
break
|
|
}
|
|
}
|
|
if (scanCursor < resultDeletionRange.end) {
|
|
this.changes.push({
|
|
d: this.source.slice(this.sourceCursor, sourceDeletionRange.end),
|
|
p: this.result.length,
|
|
})
|
|
}
|
|
this.sourceCursor = sourceDeletionRange.end
|
|
}
|
|
|
|
finish() {
|
|
if (this.sourceCursor < this.source.length) {
|
|
this.result += this.source.slice(this.sourceCursor)
|
|
}
|
|
for (const op of this.changes) {
|
|
if ('p' in op && typeof op.p === 'number') {
|
|
// Maybe we have to move the position of the deletion to account for
|
|
// tracked changes that we're hiding in the UI.
|
|
op.p -= this.trackedChanges
|
|
.asSorted()
|
|
.filter(tc => tc.tracking.type === 'delete' && tc.range.start < op.p)
|
|
.map(tc => {
|
|
if (tc.range.end < op.p) {
|
|
return tc.range.length
|
|
}
|
|
return op.p - tc.range.start
|
|
})
|
|
.reduce((a, b) => a + b, 0)
|
|
}
|
|
}
|
|
}
|
|
}
|