overleaf/services/project-history/app/js/ChunkTranslator.js
Mathias Jakobsen 4ef7bc617b Merge pull request #17196 from overleaf/mj-tracked-deletes-filtering
[project-history] Filter tracked deletes from diff views

GitOrigin-RevId: 32c49a740932ef28534b82d390fe00e6500864ca
2024-03-13 09:03:19 +00:00

578 lines
16 KiB
JavaScript

import _ from 'lodash'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as WebApiManager from './WebApiManager.js'
import * as Errors from './Errors.js'
import {
TextOperation,
InsertOp,
RemoveOp,
RetainOp,
Range,
TrackedChangeList,
} from 'overleaf-editor-core'
/**
* @typedef {import('overleaf-editor-core/lib/types').RawEditOperation} RawEditOperation
* @typedef {import('overleaf-editor-core/lib/types').TrackedChangeRawData} TrackedChangeRawData
*/
export function convertToSummarizedUpdates(chunk, callback) {
const version = chunk.chunk.startVersion
const { files } = chunk.chunk.history.snapshot
const builder = new UpdateSetBuilder(version, files)
for (const change of chunk.chunk.history.changes) {
try {
builder.applyChange(change)
} catch (error1) {
const error = error1
return callback(error)
}
}
callback(null, builder.summarizedUpdates)
}
export function convertToDiffUpdates(
projectId,
chunk,
pathname,
fromVersion,
toVersion,
callback
) {
let error
let version = chunk.chunk.startVersion
const { files } = chunk.chunk.history.snapshot
const builder = new UpdateSetBuilder(version, files)
let file = null
for (const change of chunk.chunk.history.changes) {
// Because we're referencing by pathname, which can change, we
// want to get the first file in the range fromVersion:toVersion
// that has the pathname we want. Note that this might not exist yet
// at fromVersion, so we'll just settle for the first one we find
// after that.
if (fromVersion <= version && version <= toVersion) {
if (file == null) {
file = builder.getFile(pathname)
}
}
try {
builder.applyChange(change)
} catch (error1) {
error = error1
return callback(error)
}
version += 1
}
// Versions act as fence posts, with updates taking us from one to another,
// so we also need to check after the final update, when we're at the last version.
if (fromVersion <= version && version <= toVersion) {
if (file == null) {
file = builder.getFile(pathname)
}
}
// return an empty diff if the file was flagged as missing with an explicit null
if (builder.getFile(pathname) === null) {
return callback(null, { initialContent: '', updates: [] })
}
if (file == null) {
error = new Errors.NotFoundError(
`pathname '${pathname}' not found in range`
)
return callback(error)
}
WebApiManager.getHistoryId(projectId, (err, historyId) => {
if (err) {
return callback(err)
}
file.getDiffUpdates(historyId, fromVersion, toVersion, callback)
})
}
class UpdateSetBuilder {
constructor(startVersion, files) {
this.version = startVersion
this.summarizedUpdates = []
this.files = Object.create(null)
for (const pathname in files) {
// initialize file from snapshot
const data = files[pathname]
this.files[pathname] = new File(pathname, data, startVersion)
}
}
getFile(pathname) {
return this.files[pathname]
}
applyChange(change) {
const timestamp = new Date(change.timestamp)
let authors = _.map(change.authors, id => {
if (id == null) {
return null
}
return id
})
authors = authors.concat(change.v2Authors || [])
this.currentUpdate = {
meta: {
users: authors,
start_ts: timestamp.getTime(),
end_ts: timestamp.getTime(),
},
v: this.version,
pathnames: new Set([]),
project_ops: [],
}
if (change.origin) {
this.currentUpdate.meta.origin = change.origin
}
for (const op of change.operations) {
this.applyOperation(op, timestamp, authors)
}
this.currentUpdate.pathnames = Array.from(this.currentUpdate.pathnames)
this.summarizedUpdates.push(this.currentUpdate)
this.version += 1
}
applyOperation(op, timestamp, authors) {
if (UpdateSetBuilder._isTextOperation(op)) {
this.applyTextOperation(op, timestamp, authors)
} else if (UpdateSetBuilder._isRenameOperation(op)) {
this.applyRenameOperation(op, timestamp, authors)
} else if (UpdateSetBuilder._isRemoveFileOperation(op)) {
this.applyRemoveFileOperation(op, timestamp, authors)
} else if (UpdateSetBuilder._isAddFileOperation(op)) {
this.applyAddFileOperation(op, timestamp, authors)
}
}
applyTextOperation(operation, timestamp, authors) {
const { pathname } = operation
if (pathname === '') {
// this shouldn't happen, but we continue to allow the user to see the history
logger.warn(
{ operation, timestamp, authors },
'pathname is empty for text operation'
)
return
}
const file = this.files[pathname]
if (file == null) {
// this shouldn't happen, but we continue to allow the user to see the history
logger.warn(
{ operation, timestamp, authors },
'file is missing for text operation'
)
this.files[pathname] = null // marker for a missing file
return
}
file.applyTextOperation(authors, timestamp, this.version, operation)
this.currentUpdate.pathnames.add(pathname)
}
applyRenameOperation(operation, timestamp, authors) {
const { pathname, newPathname } = operation
const file = this.files[pathname]
if (file == null) {
// this shouldn't happen, but we continue to allow the user to see the history
logger.warn(
{ operation, timestamp, authors },
'file is missing for rename operation'
)
this.files[pathname] = null // marker for a missing file
return
}
file.rename(newPathname)
delete this.files[pathname]
this.files[newPathname] = file
this.currentUpdate.project_ops.push({
rename: { pathname, newPathname },
})
}
applyAddFileOperation(operation, timestamp, authors) {
const { pathname } = operation
// add file
this.files[pathname] = new File(pathname, operation.file, this.version)
this.currentUpdate.project_ops.push({ add: { pathname } })
}
applyRemoveFileOperation(operation, timestamp, authors) {
const { pathname } = operation
const file = this.files[pathname]
if (file == null) {
// this shouldn't happen, but we continue to allow the user to see the history
logger.warn(
{ operation, timestamp, authors },
'pathname not found when removing file'
)
this.files[pathname] = null // marker for a missing file
return
}
delete this.files[pathname]
this.currentUpdate.project_ops.push({ remove: { pathname } })
}
static _isTextOperation(op) {
return Object.prototype.hasOwnProperty.call(op, 'textOperation')
}
static _isRenameOperation(op) {
return (
Object.prototype.hasOwnProperty.call(op, 'newPathname') &&
op.newPathname !== ''
)
}
static _isRemoveFileOperation(op) {
return (
Object.prototype.hasOwnProperty.call(op, 'newPathname') &&
op.newPathname === ''
)
}
static _isAddFileOperation(op) {
return Object.prototype.hasOwnProperty.call(op, 'file')
}
}
/**
* @param {string} content
* @param {TrackedChangeList} trackedChanges
* @returns {string}
*/
function removeTrackedDeletesFromString(content, trackedChanges) {
let result = ''
let cursor = 0
const trackedDeletes = trackedChanges.trackedChanges.filter(
tc => tc.tracking.type === 'delete'
)
for (const trackedChange of trackedDeletes) {
if (cursor < trackedChange.range.start) {
result += content.slice(cursor, trackedChange.range.start)
}
// skip the tracked change itself
cursor = trackedChange.range.end
}
result += content.slice(cursor)
return result
}
class File {
constructor(pathname, snapshot, initialVersion) {
this.pathname = pathname
this.snapshot = snapshot
this.initialVersion = initialVersion
this.operations = []
}
applyTextOperation(authors, timestamp, version, operation) {
this.operations.push({ authors, timestamp, version, operation })
}
rename(pathname) {
this.pathname = pathname
}
getDiffUpdates(historyId, fromVersion, toVersion, callback) {
if (this.snapshot.stringLength == null) {
// Binary file
return callback(null, { binary: true })
}
this._loadContentAndRanges(historyId, (error, content, ranges) => {
if (error != null) {
return callback(OError.tag(error))
}
const trackedChanges = TrackedChangeList.fromRaw(
ranges?.trackedChanges || []
)
/** @type {string | undefined} */
let initialContent
const updates = []
for (let operation of this.operations) {
if (!('textOperation' in operation.operation)) {
// We only care about text operations
continue
}
let authors, ops, timestamp, version
;({ authors, timestamp, version, operation } = operation)
// Set the initialContent to the latest version we have before the diff
// begins. 'version' here refers to the document version as we are
// applying the updates. So we store the content *before* applying the
// updates.
if (version >= fromVersion && initialContent === undefined) {
initialContent = removeTrackedDeletesFromString(
content,
trackedChanges
)
}
;({ content, ops } = this._convertTextOperation(
content,
operation,
trackedChanges
))
// We only need to return the updates between fromVersion and toVersion
if (fromVersion <= version && version < toVersion) {
updates.push({
meta: {
users: authors,
start_ts: timestamp.getTime(),
end_ts: timestamp.getTime(),
},
v: version,
op: ops,
})
}
}
if (initialContent === undefined) {
initialContent = removeTrackedDeletesFromString(content, trackedChanges)
}
callback(null, { initialContent, updates })
})
}
/**
*
* @param {string} initialContent
* @param {RawEditOperation} operation
* @param {TrackedChangeList} trackedChanges
*/
_convertTextOperation(initialContent, operation, trackedChanges) {
const textOp = TextOperation.fromJSON(operation)
const textUpdateBuilder = new TextUpdateBuilder(
initialContent,
trackedChanges
)
for (const op of textOp.ops) {
textUpdateBuilder.applyOp(op)
}
textUpdateBuilder.finish()
return {
content: textUpdateBuilder.result,
ops: textUpdateBuilder.changes,
}
}
_loadContentAndRanges(historyId, callback) {
HistoryStoreManager.getProjectBlob(
historyId,
this.snapshot.hash,
(err, content) => {
if (err) {
return callback(err)
}
if (this.snapshot.rangesHash) {
HistoryStoreManager.getProjectBlob(
historyId,
this.snapshot.rangesHash,
(err, ranges) => {
if (err) {
return callback(err)
}
return callback(null, content, JSON.parse(ranges))
}
)
} else {
return callback(null, content, undefined)
}
}
)
}
}
class TextUpdateBuilder {
/**
*
* @param {string} source
* @param {TrackedChangeList} ranges
*/
constructor(source, ranges) {
this.trackedChanges = ranges
this.source = source
this.sourceCursor = 0
this.result = ''
/** @type {({i: string, p: number} | {d: string, p: number})[]} */
this.changes = []
}
applyOp(op) {
if (op instanceof RetainOp) {
const length = this.result.length
this.applyRetain(op)
this.trackedChanges.applyRetain(length, op.length, {
tracking: op.tracking,
})
}
if (op instanceof InsertOp) {
const length = this.result.length
this.applyInsert(op)
this.trackedChanges.applyInsert(length, op.insertion, {
tracking: op.tracking,
})
}
if (op instanceof RemoveOp) {
const length = this.result.length
this.applyDelete(op)
this.trackedChanges.applyDelete(length, op.length)
}
}
/**
*
* @param {RetainOp} retain
*/
applyRetain(retain) {
const rangeOfRetention = new Range(this.sourceCursor, retain.length)
let cursor = this.sourceCursor
if (retain.tracking) {
// We are modifying existing tracked deletes. We need to treat removal
// (type insert/none) of a tracked delete as an insertion. Similarly, any
// range we introduce as a tracked deletion must be reported as a deletion.
const trackedDeletes = this.trackedChanges.trackedChanges.filter(
tc =>
tc.tracking.type === 'delete' && tc.range.overlaps(rangeOfRetention)
)
for (const trackedDelete of trackedDeletes) {
if (cursor < trackedDelete.range.start) {
if (retain.tracking.type === 'delete') {
this.changes.push({
d: this.source.slice(cursor, trackedDelete.range.start),
p: this.result.length,
})
}
this.result += this.source.slice(cursor, trackedDelete.range.start)
cursor = trackedDelete.range.start
}
const endOfInsertion = Math.min(
trackedDelete.range.end,
rangeOfRetention.end
)
const text = this.source.slice(cursor, endOfInsertion)
if (
retain.tracking.type === 'none' ||
retain.tracking.type === 'insert'
) {
this.changes.push({
i: text,
p: this.result.length,
})
}
this.result += text
cursor = endOfInsertion
if (cursor >= rangeOfRetention.end) {
break
}
}
}
if (cursor < rangeOfRetention.end) {
// The last region is not a tracked delete. But we should still handle
// a new tracked delete as a deletion.
const text = this.source.slice(cursor, rangeOfRetention.end)
if (retain.tracking?.type === 'delete') {
this.changes.push({
d: text,
p: this.result.length,
})
}
this.result += text
}
this.sourceCursor += retain.length
}
/**
*
* @param {InsertOp} insert
*/
applyInsert(insert) {
if (insert.tracking?.type !== 'delete') {
// Skip tracked deletions
this.changes.push({
i: insert.insertion,
p: this.result.length,
})
}
this.result += insert.insertion
// The source cursor doesn't advance
}
/**
*
* @param {RemoveOp} deletion
*/
applyDelete(deletion) {
const rangeOfDeletion = new Range(this.sourceCursor, deletion.length)
const trackedDeletes = this.trackedChanges.trackedChanges
.filter(
tc =>
tc.tracking.type === 'delete' && tc.range.overlaps(rangeOfDeletion)
)
.sort((a, b) => a.range.start - b.range.start)
let cursor = this.sourceCursor
for (const trackedDelete of trackedDeletes) {
if (cursor < trackedDelete.range.start) {
this.changes.push({
d: this.source.slice(cursor, trackedDelete.range.start),
p: this.result.length,
})
}
// skip the tracked delete itself
cursor = Math.min(trackedDelete.range.end, rangeOfDeletion.end)
if (cursor >= rangeOfDeletion.end) {
break
}
}
if (cursor < rangeOfDeletion.end) {
this.changes.push({
d: this.source.slice(cursor, rangeOfDeletion.end),
p: this.result.length,
})
}
this.sourceCursor = rangeOfDeletion.end
}
finish() {
if (this.sourceCursor < this.source.length) {
this.result += this.source.slice(this.sourceCursor)
}
for (const op of this.changes) {
if ('p' in op && typeof op.p === 'number') {
// Maybe we have to move the position of the deletion to account for
// tracked changes that we're hiding in the UI.
op.p -= this.trackedChanges.trackedChanges
.filter(tc => tc.tracking.type === 'delete' && tc.range.start < op.p)
.map(tc => {
if (tc.range.end < op.p) {
return tc.range.length
}
return op.p - tc.range.start
})
.reduce((a, b) => a + b, 0)
}
}
}
}