Merge pull request #17196 from overleaf/mj-tracked-deletes-filtering

[project-history] Filter tracked deletes from diff views

GitOrigin-RevId: 32c49a740932ef28534b82d390fe00e6500864ca
This commit is contained in:
Mathias Jakobsen 2024-03-12 12:13:40 +00:00 committed by Copybot
parent 3e701f1388
commit 4ef7bc617b
3 changed files with 846 additions and 93 deletions

View file

@ -29,6 +29,15 @@ const safePathname = require('./lib/safe_pathname')
const Snapshot = require('./lib/snapshot')
const util = require('./lib/util')
const V2DocVersions = require('./lib/v2_doc_versions')
const {
InsertOp,
RemoveOp,
RetainOp,
ScanOp,
} = require('./lib/operation/scan_op')
const TrackedChange = require('./lib/file_data/tracked_change')
const TrackedChangeList = require('./lib/file_data/tracked_change_list')
const Range = require('./lib/range')
exports.AddCommentOperation = AddCommentOperation
exports.Author = Author
@ -61,3 +70,10 @@ exports.safePathname = safePathname
exports.Snapshot = Snapshot
exports.util = util
exports.V2DocVersions = V2DocVersions
exports.ScanOp = ScanOp
exports.InsertOp = InsertOp
exports.RetainOp = RetainOp
exports.RemoveOp = RemoveOp
exports.TrackedChangeList = TrackedChangeList
exports.TrackedChange = TrackedChange
exports.Range = Range

View file

@ -4,6 +4,19 @@ import OError from '@overleaf/o-error'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as WebApiManager from './WebApiManager.js'
import * as Errors from './Errors.js'
import {
TextOperation,
InsertOp,
RemoveOp,
RetainOp,
Range,
TrackedChangeList,
} from 'overleaf-editor-core'
/**
* @typedef {import('overleaf-editor-core/lib/types').RawEditOperation} RawEditOperation
* @typedef {import('overleaf-editor-core/lib/types').TrackedChangeRawData} TrackedChangeRawData
*/
export function convertToSummarizedUpdates(chunk, callback) {
const version = chunk.chunk.startVersion
@ -242,6 +255,28 @@ class UpdateSetBuilder {
}
}
/**
* @param {string} content
* @param {TrackedChangeList} trackedChanges
* @returns {string}
*/
function removeTrackedDeletesFromString(content, trackedChanges) {
let result = ''
let cursor = 0
const trackedDeletes = trackedChanges.trackedChanges.filter(
tc => tc.tracking.type === 'delete'
)
for (const trackedChange of trackedDeletes) {
if (cursor < trackedChange.range.start) {
result += content.slice(cursor, trackedChange.range.start)
}
// skip the tracked change itself
cursor = trackedChange.range.end
}
result += content.slice(cursor)
return result
}
class File {
constructor(pathname, snapshot, initialVersion) {
this.pathname = pathname
@ -263,49 +298,75 @@ class File {
// Binary file
return callback(null, { binary: true })
}
HistoryStoreManager.getProjectBlob(
historyId,
this.snapshot.hash,
(error, content) => {
if (error != null) {
return callback(OError.tag(error))
}
let initialContent = content
const updates = []
for (let operation of this.operations) {
let authors, ops, timestamp, version
;({ authors, timestamp, version, operation } = operation)
;({ content, ops } = this._convertTextOperation(content, operation))
// Keep updating our initialContent, until we're actually in the version
// we want to diff, at which point initialContent is the content just before
// the diff updates we will return
if (version < fromVersion) {
initialContent = content
}
// We only need to return the updates between fromVersion and toVersion
if (fromVersion <= version && version < toVersion) {
updates.push({
meta: {
users: authors,
start_ts: timestamp.getTime(),
end_ts: timestamp.getTime(),
},
v: version,
op: ops,
})
}
}
callback(null, { initialContent, updates })
this._loadContentAndRanges(historyId, (error, content, ranges) => {
if (error != null) {
return callback(OError.tag(error))
}
)
const trackedChanges = TrackedChangeList.fromRaw(
ranges?.trackedChanges || []
)
/** @type {string | undefined} */
let initialContent
const updates = []
for (let operation of this.operations) {
if (!('textOperation' in operation.operation)) {
// We only care about text operations
continue
}
let authors, ops, timestamp, version
;({ authors, timestamp, version, operation } = operation)
// Set the initialContent to the latest version we have before the diff
// begins. 'version' here refers to the document version as we are
// applying the updates. So we store the content *before* applying the
// updates.
if (version >= fromVersion && initialContent === undefined) {
initialContent = removeTrackedDeletesFromString(
content,
trackedChanges
)
}
;({ content, ops } = this._convertTextOperation(
content,
operation,
trackedChanges
))
// We only need to return the updates between fromVersion and toVersion
if (fromVersion <= version && version < toVersion) {
updates.push({
meta: {
users: authors,
start_ts: timestamp.getTime(),
end_ts: timestamp.getTime(),
},
v: version,
op: ops,
})
}
}
if (initialContent === undefined) {
initialContent = removeTrackedDeletesFromString(content, trackedChanges)
}
callback(null, { initialContent, updates })
})
}
_convertTextOperation(content, operation) {
const textUpdateBuilder = new TextUpdateBuilder(content)
for (const op of operation.textOperation || []) {
/**
*
* @param {string} initialContent
* @param {RawEditOperation} operation
* @param {TrackedChangeList} trackedChanges
*/
_convertTextOperation(initialContent, operation, trackedChanges) {
const textOp = TextOperation.fromJSON(operation)
const textUpdateBuilder = new TextUpdateBuilder(
initialContent,
trackedChanges
)
for (const op of textOp.ops) {
textUpdateBuilder.applyOp(op)
}
textUpdateBuilder.finish()
@ -314,76 +375,204 @@ class File {
ops: textUpdateBuilder.changes,
}
}
_loadContentAndRanges(historyId, callback) {
HistoryStoreManager.getProjectBlob(
historyId,
this.snapshot.hash,
(err, content) => {
if (err) {
return callback(err)
}
if (this.snapshot.rangesHash) {
HistoryStoreManager.getProjectBlob(
historyId,
this.snapshot.rangesHash,
(err, ranges) => {
if (err) {
return callback(err)
}
return callback(null, content, JSON.parse(ranges))
}
)
} else {
return callback(null, content, undefined)
}
}
)
}
}
class TextUpdateBuilder {
constructor(source) {
/**
*
* @param {string} source
* @param {TrackedChangeList} ranges
*/
constructor(source, ranges) {
this.trackedChanges = ranges
this.source = source
this.sourceCursor = 0
this.result = ''
/** @type {({i: string, p: number} | {d: string, p: number})[]} */
this.changes = []
}
applyOp(op) {
if (TextUpdateBuilder._isRetainOperation(op)) {
if (op instanceof RetainOp) {
const length = this.result.length
this.applyRetain(op)
this.trackedChanges.applyRetain(length, op.length, {
tracking: op.tracking,
})
}
if (TextUpdateBuilder._isInsertOperation(op)) {
if (op instanceof InsertOp) {
const length = this.result.length
this.applyInsert(op)
this.trackedChanges.applyInsert(length, op.insertion, {
tracking: op.tracking,
})
}
if (TextUpdateBuilder._isDeleteOperation(op)) {
this.applyDelete(-op)
if (op instanceof RemoveOp) {
const length = this.result.length
this.applyDelete(op)
this.trackedChanges.applyDelete(length, op.length)
}
}
applyRetain(offset) {
this.result += this.source.slice(
this.sourceCursor,
this.sourceCursor + offset
)
this.sourceCursor += offset
/**
*
* @param {RetainOp} retain
*/
applyRetain(retain) {
const rangeOfRetention = new Range(this.sourceCursor, retain.length)
let cursor = this.sourceCursor
if (retain.tracking) {
// We are modifying existing tracked deletes. We need to treat removal
// (type insert/none) of a tracked delete as an insertion. Similarly, any
// range we introduce as a tracked deletion must be reported as a deletion.
const trackedDeletes = this.trackedChanges.trackedChanges.filter(
tc =>
tc.tracking.type === 'delete' && tc.range.overlaps(rangeOfRetention)
)
for (const trackedDelete of trackedDeletes) {
if (cursor < trackedDelete.range.start) {
if (retain.tracking.type === 'delete') {
this.changes.push({
d: this.source.slice(cursor, trackedDelete.range.start),
p: this.result.length,
})
}
this.result += this.source.slice(cursor, trackedDelete.range.start)
cursor = trackedDelete.range.start
}
const endOfInsertion = Math.min(
trackedDelete.range.end,
rangeOfRetention.end
)
const text = this.source.slice(cursor, endOfInsertion)
if (
retain.tracking.type === 'none' ||
retain.tracking.type === 'insert'
) {
this.changes.push({
i: text,
p: this.result.length,
})
}
this.result += text
cursor = endOfInsertion
if (cursor >= rangeOfRetention.end) {
break
}
}
}
if (cursor < rangeOfRetention.end) {
// The last region is not a tracked delete. But we should still handle
// a new tracked delete as a deletion.
const text = this.source.slice(cursor, rangeOfRetention.end)
if (retain.tracking?.type === 'delete') {
this.changes.push({
d: text,
p: this.result.length,
})
}
this.result += text
}
this.sourceCursor += retain.length
}
applyInsert(content) {
this.changes.push({
i: content,
p: this.result.length,
})
this.result += content
/**
*
* @param {InsertOp} insert
*/
applyInsert(insert) {
if (insert.tracking?.type !== 'delete') {
// Skip tracked deletions
this.changes.push({
i: insert.insertion,
p: this.result.length,
})
}
this.result += insert.insertion
// The source cursor doesn't advance
}
applyDelete(offset) {
const deletedContent = this.source.slice(
this.sourceCursor,
this.sourceCursor + offset
)
this.changes.push({
d: deletedContent,
p: this.result.length,
})
this.sourceCursor += offset
/**
*
* @param {RemoveOp} deletion
*/
applyDelete(deletion) {
const rangeOfDeletion = new Range(this.sourceCursor, deletion.length)
const trackedDeletes = this.trackedChanges.trackedChanges
.filter(
tc =>
tc.tracking.type === 'delete' && tc.range.overlaps(rangeOfDeletion)
)
.sort((a, b) => a.range.start - b.range.start)
let cursor = this.sourceCursor
for (const trackedDelete of trackedDeletes) {
if (cursor < trackedDelete.range.start) {
this.changes.push({
d: this.source.slice(cursor, trackedDelete.range.start),
p: this.result.length,
})
}
// skip the tracked delete itself
cursor = Math.min(trackedDelete.range.end, rangeOfDeletion.end)
if (cursor >= rangeOfDeletion.end) {
break
}
}
if (cursor < rangeOfDeletion.end) {
this.changes.push({
d: this.source.slice(cursor, rangeOfDeletion.end),
p: this.result.length,
})
}
this.sourceCursor = rangeOfDeletion.end
}
finish() {
if (this.sourceCursor < this.source.length) {
this.result += this.source.slice(this.sourceCursor)
}
}
static _isRetainOperation(op) {
return typeof op === 'number' && op > 0
}
static _isInsertOperation(op) {
return typeof op === 'string'
}
static _isDeleteOperation(op) {
return typeof op === 'number' && op < 0
for (const op of this.changes) {
if ('p' in op && typeof op.p === 'number') {
// Maybe we have to move the position of the deletion to account for
// tracked changes that we're hiding in the UI.
op.p -= this.trackedChanges.trackedChanges
.filter(tc => tc.tracking.type === 'delete' && tc.range.start < op.p)
.map(tc => {
if (tc.range.end < op.p) {
return tc.range.length
}
return op.p - tc.range.start
})
.reduce((a, b) => a + b, 0)
}
}
}
}

View file

@ -275,9 +275,13 @@ describe('ChunkTranslator', function () {
op: [
{ i: '111 ', p: 0 },
{ d: 'aa ', p: 4 },
{ i: '2222 ', p: 10 },
{ d: 'c', p: 15 },
{ i: 'd', p: 15 },
// NOTE: The construction of TextOperation can merge an
// insertion across a deletion operation, which is why this is
// ever so slightly different from the textOperation defined
// in the chunk. Both diffs represent the same change in
// content.
{ i: '2222 d', p: 10 },
{ d: 'c', p: 16 },
],
meta: {
users: [this.author1.id],
@ -811,9 +815,7 @@ describe('ChunkTranslator', function () {
},
changes: [
{
operations: [
{ pathname: 'main.tex', textOperation: [0, 'foo'] },
],
operations: [{ pathname: 'main.tex', textOperation: ['foo'] }],
timestamp: this.date.toISOString(),
authors: [this.author1.id],
},
@ -1124,9 +1126,7 @@ describe('ChunkTranslator', function () {
},
changes: [
{
operations: [
{ pathname: 'main.tex', textOperation: [0, 'foo'] },
],
operations: [{ pathname: 'main.tex', textOperation: ['foo'] }],
timestamp: this.date.toISOString(),
authors: [this.author1.id],
},
@ -1136,9 +1136,7 @@ describe('ChunkTranslator', function () {
authors: [this.author1.id],
},
{
operations: [
{ pathname: 'other.tex', textOperation: [0, 'foo'] },
],
operations: [{ pathname: 'other.tex', textOperation: ['foo'] }],
timestamp: this.date.toISOString(),
authors: [this.author1.id],
},
@ -1731,4 +1729,554 @@ describe('ChunkTranslator', function () {
})
})
})
describe('with tracked changes in a file', function () {
describe('convertToDiffUpdates', function () {
beforeEach(function () {
this.rangesHash = 'some_ranges_hash'
this.fileContents = 'Hello planet world, this is a test'
this.ranges = JSON.stringify({
trackedChanges: [
{
range: { pos: 6, length: 7 },
tracking: {
type: 'delete',
userId: this.author1.id,
ts: '2024-01-01T00:00:00.000Z',
},
},
],
})
this.HistoryStoreManager.getProjectBlob
.withArgs(this.historyId, this.rangesHash)
.yields(null, this.ranges)
this.HistoryStoreManager.getProjectBlob
.withArgs(this.historyId, this.fileHash)
.yields(null, this.fileContents)
})
it('should filter out the tracked deletes that were present in the chunk', function (done) {
this.chunk = {
chunk: {
startVersion: 0,
history: {
snapshot: {
files: {
'main.tex': {
hash: this.fileHash,
rangesHash: this.rangesHash,
stringLength: 42,
},
},
},
changes: [
{
operations: [
{
pathname: 'main.tex',
textOperation: [
// [...] is a tracked delete
28, // Hello [planet ]world, this is |a test
-1, // Hello [planet ]world, this is | test
'the', // Hello [planet ]world, this is the| test
5, // Hello [planet ]world, this is the test|
],
},
],
timestamp: this.date.toISOString(),
authors: [this.author1.id],
},
],
},
},
authors: [this.author1.id],
}
this.ChunkTranslator.convertToDiffUpdates(
this.projectId,
this.chunk,
'main.tex',
0,
1,
(error, diff) => {
expect(error).to.be.null
expect(diff.initialContent).to.equal('Hello world, this is a test')
expect(diff.updates).to.deep.equal([
{
op: [
{ i: 'the', p: 21 },
{ d: 'a', p: 24 },
],
meta: {
users: [this.author1.id],
start_ts: this.date.getTime(),
end_ts: this.date.getTime(),
},
v: 0,
},
])
done()
}
)
})
it('should filter out tracked deletes across multiple changes', function (done) {
this.chunk = {
chunk: {
startVersion: 0,
history: {
snapshot: {
files: {
'main.tex': {
hash: this.fileHash,
rangesHash: this.rangesHash,
stringLength: 42,
},
},
},
changes: [
{
operations: [
{
pathname: 'main.tex',
textOperation: [
// [...] is a tracked delete
28, // Hello [planet ]world, this is |a test
-1, // Hello [planet ]world, this is | test
'the', // Hello [planet ]world, this is the| test
5, // Hello [planet ]world, this is the test|
],
},
{
pathname: 'main.tex',
textOperation: [
// [...] is a tracked delete
22, // Hello [planet ]world, th|is is the test
-2, // Hello [planet ]world, th| is the test
'at', // Hello [planet ]world, that| is the test
12, // Hello [planet ]world, that is the test|
],
},
],
timestamp: this.date.toISOString(),
authors: [this.author1.id],
},
],
},
},
authors: [this.author1.id],
}
this.ChunkTranslator.convertToDiffUpdates(
this.projectId,
this.chunk,
'main.tex',
0,
1,
(error, diff) => {
expect(error).to.be.null
expect(diff.initialContent).to.equal('Hello world, this is a test')
expect(diff.updates).to.deep.equal([
{
op: [
{ i: 'the', p: 21 },
{ d: 'a', p: 24 },
],
meta: {
users: [this.author1.id],
start_ts: this.date.getTime(),
end_ts: this.date.getTime(),
},
v: 0,
},
{
op: [
{ i: 'at', p: 15 },
{ d: 'is', p: 17 },
],
meta: {
users: [this.author1.id],
start_ts: this.date.getTime(),
end_ts: this.date.getTime(),
},
v: 0,
},
])
done()
}
)
})
it('should handle tracked delete in the operation', function (done) {
this.chunk = {
chunk: {
startVersion: 0,
history: {
snapshot: {
files: {
'main.tex': {
hash: this.fileHash,
rangesHash: this.rangesHash,
stringLength: 42,
},
},
},
changes: [
{
operations: [
{
pathname: 'main.tex',
textOperation: [
// [...] is a tracked delete
5, // Hello| [planet ]world, this is a test
{
r: 1,
tracking: {
type: 'delete',
userId: this.author1.id,
ts: '2024-01-01T00:00:00.000Z',
},
}, // Hello[ ]|[planet ]world, this is test
7, // Hello[ ][planet ]|world, this is the test
{
r: 5,
tracking: {
type: 'delete',
userId: this.author1.id,
ts: '2024-01-01T00:00:00.000Z',
},
}, // Hello[ ][planet ][world]|, this is the test
18, // Hello[ ][planet ][world], this is the test|
],
},
],
timestamp: this.date.toISOString(),
authors: [this.author1.id],
},
],
},
},
authors: [this.author1.id],
}
this.ChunkTranslator.convertToDiffUpdates(
this.projectId,
this.chunk,
'main.tex',
0,
1,
(error, diff) => {
expect(error).to.be.null
expect(diff.initialContent).to.equal('Hello world, this is a test')
expect(diff.updates).to.deep.equal([
{
op: [
{ d: ' ', p: 5 },
{ d: 'world', p: 5 },
],
meta: {
users: [this.author1.id],
start_ts: this.date.getTime(),
end_ts: this.date.getTime(),
},
v: 0,
},
])
done()
}
)
})
it('should filter out tracked deletes in insert operations', function (done) {
this.chunk = {
chunk: {
startVersion: 0,
history: {
snapshot: {
files: {
'main.tex': {
hash: this.fileHash,
rangesHash: this.rangesHash,
stringLength: 42,
},
},
},
changes: [
{
operations: [
{
pathname: 'main.tex',
textOperation: [
// [...] is a tracked delete
13, // Hello [planet ]|world, this is a test
{
i: 'pluto',
tracking: {
type: 'delete',
userId: this.author1.id,
ts: '2024-01-01T00:00:00.000Z',
},
}, // Hello [planet pluto]|world, this is a test
21, // Hello [planet pluto]world, this is a test|
],
},
],
timestamp: this.date.toISOString(),
authors: [this.author1.id],
},
],
},
},
authors: [this.author1.id],
}
this.ChunkTranslator.convertToDiffUpdates(
this.projectId,
this.chunk,
'main.tex',
0,
1,
(error, diff) => {
expect(error).to.be.null
expect(diff.initialContent).to.equal('Hello world, this is a test')
expect(diff.updates).to.deep.equal([
{
op: [],
meta: {
users: [this.author1.id],
start_ts: this.date.getTime(),
end_ts: this.date.getTime(),
},
v: 0,
},
])
done()
}
)
})
it('should filter out tracked deletes in delete operations', function (done) {
this.chunk = {
chunk: {
startVersion: 0,
history: {
snapshot: {
files: {
'main.tex': {
hash: this.fileHash,
rangesHash: this.rangesHash,
stringLength: 42,
},
},
},
changes: [
{
operations: [
{
pathname: 'main.tex',
textOperation: [
// [...] is a tracked delete
6, // Hello |[planet ]world, this is a test
-3, // Hello [|net ]world, this is a test
6, // Hello [net ]wo|rld, this is a test
-3, // Hello [net ]wo|, this is a test
16, // Hello [net ]wo, this is a test|
],
},
],
timestamp: this.date.toISOString(),
authors: [this.author1.id],
},
],
},
},
authors: [this.author1.id],
}
this.ChunkTranslator.convertToDiffUpdates(
this.projectId,
this.chunk,
'main.tex',
0,
1,
(error, diff) => {
expect(error).to.be.null
expect(diff.initialContent).to.equal('Hello world, this is a test')
expect(diff.updates).to.deep.equal([
{
op: [{ d: 'rld', p: 8 }],
meta: {
users: [this.author1.id],
start_ts: this.date.getTime(),
end_ts: this.date.getTime(),
},
v: 0,
},
])
done()
}
)
})
it('should filter out tracked deletes in retain operations', function (done) {
this.chunk = {
chunk: {
startVersion: 0,
history: {
snapshot: {
files: {
'main.tex': {
hash: this.fileHash,
rangesHash: this.rangesHash,
stringLength: 42,
},
},
},
changes: [
{
operations: [
{
pathname: 'main.tex',
textOperation: [
// [...] is a tracked delete
4, // Hell|o [planet ]world, this is a test
{
r: 4,
tracking: {
type: 'none',
userId: this.author1.id,
ts: '2024-01-01T00:00:00.000Z',
},
}, // Hello pl|[anet ]world, this is a test
{
r: 3,
tracking: {
type: 'insert',
userId: this.author1.id,
ts: '2024-01-01T00:00:00.000Z',
},
}, // Hello plane|[t ]world, this is a test
23, // Hello plane[t ]world, this is a test|
],
},
],
timestamp: this.date.toISOString(),
authors: [this.author1.id],
},
],
},
},
authors: [this.author1.id],
}
this.ChunkTranslator.convertToDiffUpdates(
this.projectId,
this.chunk,
'main.tex',
0,
1,
(error, diff) => {
expect(error).to.be.null
expect(diff.initialContent).to.equal('Hello world, this is a test')
expect(diff.updates).to.deep.equal([
{
op: [
{
i: 'pl',
p: 6,
},
{
i: 'ane',
p: 8,
},
],
meta: {
users: [this.author1.id],
start_ts: this.date.getTime(),
end_ts: this.date.getTime(),
},
v: 0,
},
])
done()
}
)
})
it('should report tracked deletion (retains) as deletions', function (done) {
this.chunk = {
chunk: {
startVersion: 0,
history: {
snapshot: {
files: {
'main.tex': {
hash: this.fileHash,
rangesHash: this.rangesHash,
stringLength: 42,
},
},
},
changes: [
{
operations: [
{
pathname: 'main.tex',
textOperation: [
// [...] is a tracked delete
{
r: 34,
tracking: {
type: 'delete',
userId: this.author1.id,
ts: '2024-01-01T00:00:00.000Z',
},
}, // [Hello planet world, this is a test]|
],
},
],
timestamp: this.date.toISOString(),
authors: [this.author1.id],
},
],
},
},
authors: [this.author1.id],
}
this.ChunkTranslator.convertToDiffUpdates(
this.projectId,
this.chunk,
'main.tex',
0,
1,
(error, diff) => {
expect(error).to.be.null
expect(diff.initialContent).to.equal('Hello world, this is a test')
expect(diff.updates).to.deep.equal([
{
op: [
{
d: 'Hello ',
p: 0,
},
{
d: 'world, this is a test',
p: 0,
},
],
meta: {
users: [this.author1.id],
start_ts: this.date.getTime(),
end_ts: this.date.getTime(),
},
v: 0,
},
])
done()
}
)
})
})
})
})