Merge pull request #17859 from overleaf/mj-project-history-ranges-at-version

[overleaf-editor-core+project-history] Add endpoint to fetch ranges from version

GitOrigin-RevId: fbe8e8ef3636b344006375a92057cfc580a74616
This commit is contained in:
Mathias Jakobsen 2024-04-19 14:05:27 +01:00 committed by Copybot
parent cccd0f06d7
commit 36ad15c405
8 changed files with 594 additions and 0 deletions

View file

@ -17,6 +17,7 @@ const StringFileData = require('./file_data/string_file_data')
* @typedef {import("./types").CommentRawData} CommentRawData
* @typedef {import("./file_data/comment_list")} CommentList
* @typedef {import("./operation/text_operation")} TextOperation
* @typedef {import("./file_data/tracked_change_list")} TrackedChangeList
* @typedef {{filterTrackedDeletes?: boolean}} FileGetContentOptions
*/
@ -210,6 +211,14 @@ class File {
return this.data.getComments()
}
/**
* Get the tracked changes for this file.
* @return {TrackedChangeList}
*/
getTrackedChanges() {
return this.data.getTrackedChanges()
}
/**
* Clone a file.
*

View file

@ -21,6 +21,7 @@ let StringFileData = null
* @typedef {import("../operation/edit_operation")} EditOperation
* @typedef {import("../file_data/comment_list")} CommentList
* @typedef {import("../types").CommentRawData} CommentRawData
* @typedef {import("../file_data/tracked_change_list")} TrackedChangeList
*/
/**
@ -199,6 +200,18 @@ class FileData {
getComments() {
throw new Error('getComments not implemented for ' + JSON.stringify(this))
}
/**
* @see File#getTrackedChanges
* @function
* @return {TrackedChangeList}
* @abstract
*/
getTrackedChanges() {
throw new Error(
'getTrackedChanges not implemented for ' + JSON.stringify(this)
)
}
}
module.exports = FileData

View file

@ -112,6 +112,11 @@ class StringFileData extends FileData {
return this.comments
}
/** @inheritdoc */
getTrackedChanges() {
return this.trackedChanges
}
/**
* @inheritdoc
* @returns {Promise<StringFileData>}

View file

@ -201,6 +201,21 @@ export function getFileSnapshot(req, res, next) {
)
}
export function getRangesSnapshot(req, res, next) {
const { project_id: projectId, version, pathname } = req.params
SnapshotManager.getRangesSnapshot(
projectId,
version,
pathname,
(err, ranges) => {
if (err) {
return next(OError.tag(err))
}
res.json(ranges)
}
)
}
export function getProjectSnapshot(req, res, next) {
const { project_id: projectId, version } = req.params
SnapshotManager.getProjectSnapshot(

View file

@ -143,6 +143,11 @@ export function initialize(app) {
HttpController.getFileSnapshot
)
app.get(
'/project/:project_id/ranges/version/:version/:pathname',
HttpController.getRangesSnapshot
)
app.get(
'/project/:project_id/version/:version',
HttpController.getProjectSnapshot

View file

@ -11,6 +11,7 @@ import * as Errors from './Errors.js'
/**
* @typedef {import('stream').Readable} ReadableStream
* @typedef {import('overleaf-editor-core').Snapshot} Snapshot
* @typedef {import('./types').RangesSnapshot} RangesSnapshot
*/
StringStream.prototype._read = function () {}
@ -50,6 +51,151 @@ async function getFileSnapshotStream(projectId, version, pathname) {
}
}
/**
* Constructs a snapshot of the ranges in a document-updater compatible format.
* Positions will be relative to a document where tracked deletes have been
* removed from the string. This also means that if a tracked delete overlaps
* a comment range, the comment range will be truncated.
*
* @param {string} projectId
* @param {number} version
* @param {string} pathname
* @returns {Promise<RangesSnapshot>}
*/
async function getRangesSnapshot(projectId, version, pathname) {
const snapshot = await _getSnapshotAtVersion(projectId, version)
const file = snapshot.getFile(pathname)
if (!file) {
throw new Errors.NotFoundError(`${pathname} not found`, {
projectId,
version,
pathname,
})
}
if (!file.isEditable()) {
throw new Error('File is not editable')
}
const historyId = await WebApiManager.promises.getHistoryId(projectId)
await file.load('eager', HistoryStoreManager.getBlobStore(historyId))
const content = file.getContent()
if (!content) {
throw new Error('Unable to read file contents')
}
const trackedChanges = file.getTrackedChanges().asSorted()
const comments = file.getComments()
const docUpdaterCompatibleTrackedChanges = []
let trackedDeletionOffset = 0
for (const trackedChange of trackedChanges) {
const isTrackedDeletion = trackedChange.tracking.type === 'delete'
const trackedChangeContent = content.slice(
trackedChange.range.start,
trackedChange.range.end
)
const tcContent = isTrackedDeletion
? { d: trackedChangeContent }
: { i: trackedChangeContent }
docUpdaterCompatibleTrackedChanges.push({
op: {
p: trackedChange.range.start - trackedDeletionOffset,
...tcContent,
},
metadata: {
ts: trackedChange.tracking.ts.toISOString(),
user_id: trackedChange.tracking.userId,
},
})
if (isTrackedDeletion) {
trackedDeletionOffset += trackedChange.range.length
}
}
// Comments are shifted left by the length of any previous tracked deletions.
// If they overlap with a tracked deletion, they are truncated.
//
// Example:
// { } comment
// [ ] tracked deletion
// the quic[k {b]rown [fox] jum[ps} ove]r the lazy dog
// => rown jum
// starting at position 8
const trackedDeletions = trackedChanges.filter(
tc => tc.tracking.type === 'delete'
)
const docUpdaterCompatibleComments = []
for (const comment of comments) {
trackedDeletionOffset = 0
let trackedDeletionIndex = 0
for (const commentRange of comment.ranges) {
let commentRangeContent = ''
let offsetFromOverlappingRangeAtStart = 0
while (
trackedDeletionIndex < trackedDeletions.length &&
trackedDeletions[trackedDeletionIndex].range.start <
commentRange.start &&
trackedDeletions[trackedDeletionIndex].range.end <= commentRange.start
) {
// Skip over tracked deletions that are before the current comment range
trackedDeletionOffset +=
trackedDeletions[trackedDeletionIndex].range.length
trackedDeletionIndex++
}
if (
trackedDeletions[trackedDeletionIndex]?.range.start < commentRange.start
) {
// There's overlap with a tracked deletion, move the position left and
// truncate the overlap
offsetFromOverlappingRangeAtStart =
commentRange.start -
trackedDeletions[trackedDeletionIndex].range.start
}
// The position of the comment in the document after tracked deletions
const position =
commentRange.start -
trackedDeletionOffset -
offsetFromOverlappingRangeAtStart
let cursor = commentRange.start
while (cursor < commentRange.end) {
const trackedDeletion = trackedDeletions[trackedDeletionIndex]
if (
!trackedDeletion ||
trackedDeletion.range.start >= commentRange.end
) {
// We've run out of relevant tracked changes
commentRangeContent += content.slice(cursor, commentRange.end)
break
}
if (trackedDeletion.range.start > cursor) {
// There's a gap between the current cursor and the tracked deletion
commentRangeContent += content.slice(
cursor,
trackedDeletion.range.start
)
}
// Skip to the end of the tracked delete
cursor = trackedDeletion.range.end
trackedDeletionIndex++
trackedDeletionOffset += trackedDeletion.range.length
}
docUpdaterCompatibleComments.push({
op: {
p: position,
c: commentRangeContent,
t: comment.id,
},
})
}
}
return {
changes: docUpdaterCompatibleTrackedChanges,
comments: docUpdaterCompatibleComments,
}
}
// Returns project snapshot containing the document content for files with
// text operations in the relevant chunk, and hashes for unmodified/binary
// files. Used by git bridge to get the state of the project.
@ -135,15 +281,18 @@ async function _loadFilesLimit(snapshot, kind, blobStore) {
const getFileSnapshotStreamCb = callbackify(getFileSnapshotStream)
const getProjectSnapshotCb = callbackify(getProjectSnapshot)
const getLatestSnapshotCb = callbackify(getLatestSnapshot)
const getRangesSnapshotCb = callbackify(getRangesSnapshot)
export {
getFileSnapshotStreamCb as getFileSnapshotStream,
getProjectSnapshotCb as getProjectSnapshot,
getLatestSnapshotCb as getLatestSnapshot,
getRangesSnapshotCb as getRangesSnapshot,
}
export const promises = {
getFileSnapshotStream,
getProjectSnapshot,
getLatestSnapshot,
getRangesSnapshot,
}

View file

@ -123,3 +123,26 @@ export type RawScanOp =
| { r: number; tracking?: TrackingProps }
| { i: string; tracking?: TrackingProps; commentIds?: string[] }
| { d: number; tracking?: TrackingProps }
export type TrackedChangeSnapshot = {
op: {
p: number
} & ({ d: string } | { i: string })
metadata: {
ts: string
user_id: string
}
}
export type CommentSnapshot = {
op: {
p: number
t: string
c: string
}
}
export type RangesSnapshot = {
changes: TrackedChangeSnapshot[]
comments: CommentSnapshot[]
}

View file

@ -579,4 +579,379 @@ Four five six\
})
})
})
describe('getRangesSnapshot', function () {
beforeEach(async function () {
this.WebApiManager.promises.getHistoryId.resolves(this.historyId)
this.HistoryStoreManager.promises.getChunkAtVersion.resolves({
chunk: (this.chunk = {
history: {
snapshot: {
files: {
'main.tex': {
hash: (this.fileHash =
'5d2781d78fa5a97b7bafa849fe933dfc9dc93eba'),
rangesHash: (this.rangesHash =
'73061952d41ce54825e2fc1c36b4cf736d5fb62f'),
stringLength: 41,
},
},
},
changes: [],
},
startVersion: 1,
authors: [
{
id: 31,
email: 'author@example.com',
name: 'Author',
},
],
}),
})
this.HistoryStoreManager.getBlobStore.withArgs(this.historyId).returns({
getString: (this.getString = sinon.stub()),
getObject: (this.getObject = sinon.stub()),
})
this.getString.resolves('the quick brown fox jumps over the lazy dog')
})
describe('with tracked deletes', function () {
beforeEach(async function () {
this.getObject.resolves({
trackedChanges: [
{
// 'quick '
range: {
pos: 4,
length: 6,
},
tracking: {
type: 'delete',
userId: '31',
ts: '2023-01-01T00:00:00.000Z',
},
},
{
// 'fox '
range: {
pos: 16,
length: 4,
},
tracking: {
type: 'delete',
userId: '31',
ts: '2023-01-01T00:00:00.000Z',
},
},
{
// 'lazy '
range: {
pos: 35,
length: 5,
},
tracking: {
type: 'insert',
userId: '31',
ts: '2023-01-01T00:00:00.000Z',
},
},
{
// 'dog'
range: {
pos: 40,
length: 3,
},
tracking: {
type: 'delete',
userId: '31',
ts: '2023-01-01T00:00:00.000Z',
},
},
],
})
this.data = await this.SnapshotManager.promises.getRangesSnapshot(
this.projectId,
1,
'main.tex'
)
})
it("doesn't shift the tracked delete by itself", function () {
expect(this.data.changes[0].op.p).to.eq(4)
})
it('should move subsequent tracked changes by the length of previous deletes', function () {
expect(this.data.changes[1].op.p).to.eq(16 - 6)
expect(this.data.changes[2].op.p).to.eq(35 - 6 - 4)
})
it("shouldn't move subsequent tracked changes by previous inserts", function () {
expect(this.data.changes[3].op.p).to.eq(40 - 6 - 4)
})
})
describe('with comments and tracked deletes', function () {
beforeEach(async function () {
this.getObject.resolves({
// the quick brown fox jumps over the lazy dog
trackedChanges: [
{
// 'e qui'
range: {
pos: 2,
length: 5,
},
tracking: {
type: 'delete',
userId: '31',
ts: '2023-01-01T00:00:00.000Z',
},
},
{
// 'r'
range: {
pos: 11,
length: 1,
},
tracking: {
type: 'delete',
userId: '31',
ts: '2023-01-01T00:00:00.000Z',
},
},
{
// 'er th'
range: {
pos: 28,
length: 5,
},
tracking: {
type: 'delete',
userId: '31',
ts: '2023-01-01T00:00:00.000Z',
},
},
],
comments: [
{
id: 'comment-1',
ranges: [
// 'quick'
{
pos: 4,
length: 5,
},
// 'brown'
{
pos: 10,
length: 5,
},
// 'over'
{
pos: 26,
length: 4,
},
],
resolved: false,
},
],
})
this.data = await this.SnapshotManager.promises.getRangesSnapshot(
this.projectId,
1,
'main.tex'
)
})
it('should move the comment to the start of the tracked delete and remove overlapping text', function () {
expect(this.data.comments[0].op.p).to.eq(2)
expect(this.data.comments[0].op.c).to.eq('ck')
})
it('should remove overlapping text in middle of comment', function () {
expect(this.data.comments[1].op.p).to.eq(5)
expect(this.data.comments[1].op.c).to.eq('bown')
})
it('should remove overlapping text at end of comment', function () {
expect(this.data.comments[2].op.p).to.eq(20)
expect(this.data.comments[2].op.c).to.eq('ov')
})
})
describe('with multiple tracked changes and comments', function () {
beforeEach(async function () {
this.getObject.resolves({
trackedChanges: [
{
// 'quick '
range: {
pos: 4,
length: 6,
},
tracking: {
type: 'delete',
userId: '31',
ts: '2023-01-01T00:00:00.000Z',
},
},
{
// 'brown '
range: {
pos: 10,
length: 6,
},
tracking: {
type: 'insert',
userId: '31',
ts: '2024-01-01T00:00:00.000Z',
},
},
{
// 'lazy '
range: {
pos: 35,
length: 5,
},
tracking: {
type: 'delete',
userId: '31',
ts: '2024-01-01T00:00:00.000Z',
},
},
],
comments: [
{
id: 'comment-1',
// 'quick', 'brown', 'lazy'
ranges: [
{
pos: 4,
length: 5,
},
{
pos: 10,
length: 5,
},
{
pos: 35,
length: 4,
},
],
resolved: false,
},
{
id: 'comment-2',
// 'the', 'the'
ranges: [
{
pos: 0,
length: 3,
},
{
pos: 31,
length: 3,
},
],
resolved: true,
},
],
})
this.data = await this.SnapshotManager.promises.getRangesSnapshot(
this.projectId,
1,
'main.tex'
)
})
it('looks up ranges', function () {
expect(this.getObject).to.have.been.calledWith(this.rangesHash)
expect(this.getString).to.have.been.calledWith(this.fileHash)
})
it('should get the chunk', function () {
expect(
this.HistoryStoreManager.promises.getChunkAtVersion
).to.have.been.calledWith(this.projectId, this.historyId, 1)
})
it('returns the ranges with content and adjusted positions to ignore tracked deletes', function () {
expect(this.data).to.deep.equal({
changes: [
{
metadata: {
ts: '2023-01-01T00:00:00.000Z',
user_id: '31',
},
op: {
d: 'quick ',
p: 4,
},
},
{
metadata: {
ts: '2024-01-01T00:00:00.000Z',
user_id: '31',
},
op: {
i: 'brown ',
p: 4,
},
},
{
metadata: {
ts: '2024-01-01T00:00:00.000Z',
user_id: '31',
},
op: {
d: 'lazy ',
p: 29,
},
},
],
comments: [
{
op: {
c: '',
p: 4,
t: 'comment-1',
},
},
{
op: {
c: 'brown',
p: 4,
t: 'comment-1',
},
},
{
op: {
c: '',
p: 29,
t: 'comment-1',
},
},
{
op: {
c: 'the',
p: 0,
t: 'comment-2',
},
},
{
op: {
c: 'the',
p: 25,
t: 'comment-2',
},
},
],
})
})
})
})
})