mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-21 20:47:08 -05:00
Merge pull request #17359 from overleaf/mj-download-filtering
[overleaf-editor-core+history-v1] Filter tracked deletes when creating zip files GitOrigin-RevId: 1c19d3cb849150d33e750772399ea81c280fdd57
This commit is contained in:
parent
4ef7bc617b
commit
ce19b5f568
10 changed files with 247 additions and 87 deletions
|
@ -14,6 +14,7 @@ const StringFileData = require('./file_data/string_file_data')
|
||||||
* @typedef {import("./types").StringFileRawData} StringFileRawData
|
* @typedef {import("./types").StringFileRawData} StringFileRawData
|
||||||
* @typedef {import("./types").CommentRawData} CommentRawData
|
* @typedef {import("./types").CommentRawData} CommentRawData
|
||||||
* @typedef {import("./operation/text_operation")} TextOperation
|
* @typedef {import("./operation/text_operation")} TextOperation
|
||||||
|
* @typedef {{filterTrackedDeletes?: boolean}} FileGetContentOptions
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class NotEditableError extends OError {
|
class NotEditableError extends OError {
|
||||||
|
@ -133,10 +134,11 @@ class File {
|
||||||
* The content of the file, if it is known and if this file has UTF-8 encoded
|
* The content of the file, if it is known and if this file has UTF-8 encoded
|
||||||
* content.
|
* content.
|
||||||
*
|
*
|
||||||
|
* @param {FileGetContentOptions} [opts]
|
||||||
* @return {string | null | undefined}
|
* @return {string | null | undefined}
|
||||||
*/
|
*/
|
||||||
getContent() {
|
getContent(opts = {}) {
|
||||||
return this.data.getContent()
|
return this.data.getContent(opts)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -90,6 +90,9 @@ class HashFileData extends FileData {
|
||||||
let rangesBlob
|
let rangesBlob
|
||||||
if (this.rangesHash) {
|
if (this.rangesHash) {
|
||||||
rangesBlob = await blobStore.getBlob(this.rangesHash)
|
rangesBlob = await blobStore.getBlob(this.rangesHash)
|
||||||
|
if (!rangesBlob) {
|
||||||
|
throw new Error('Failed to look up rangesHash in blobStore')
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (!blob) throw new Error('blob not found: ' + this.hash)
|
if (!blob) throw new Error('blob not found: ' + this.hash)
|
||||||
return FileData.createLazyFromBlobs(blob, rangesBlob)
|
return FileData.createLazyFromBlobs(blob, rangesBlob)
|
||||||
|
@ -102,6 +105,9 @@ class HashFileData extends FileData {
|
||||||
*/
|
*/
|
||||||
async toHollow(blobStore) {
|
async toHollow(blobStore) {
|
||||||
const blob = await blobStore.getBlob(this.hash)
|
const blob = await blobStore.getBlob(this.hash)
|
||||||
|
if (!blob) {
|
||||||
|
throw new Error('Failed to look up hash in blobStore')
|
||||||
|
}
|
||||||
return FileData.createHollow(blob.getByteLength(), blob.getStringLength())
|
return FileData.createHollow(blob.getByteLength(), blob.getStringLength())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -90,9 +90,10 @@ class FileData {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @see File#getContent
|
* @see File#getContent
|
||||||
|
* @param {import('../file').FileGetContentOptions} [opts]
|
||||||
* @return {string | null | undefined}
|
* @return {string | null | undefined}
|
||||||
*/
|
*/
|
||||||
getContent() {
|
getContent(opts = {}) {
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -66,9 +66,29 @@ class StringFileData extends FileData {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @inheritdoc */
|
/**
|
||||||
getContent() {
|
* @inheritdoc
|
||||||
return this.content
|
* @param {import('../file').FileGetContentOptions} [opts]
|
||||||
|
*/
|
||||||
|
getContent(opts = {}) {
|
||||||
|
let content = ''
|
||||||
|
let cursor = 0
|
||||||
|
if (opts.filterTrackedDeletes) {
|
||||||
|
for (const tc of this.trackedChanges.trackedChanges) {
|
||||||
|
if (tc.tracking.type !== 'delete') {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if (cursor < tc.range.start) {
|
||||||
|
content += this.content.slice(cursor, tc.range.start)
|
||||||
|
}
|
||||||
|
// skip the tracked change
|
||||||
|
cursor = tc.range.end
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (cursor < this.content.length) {
|
||||||
|
content += this.content.slice(cursor)
|
||||||
|
}
|
||||||
|
return content
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @inheritdoc */
|
/** @inheritdoc */
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import Blob from './blob'
|
import Blob from './blob'
|
||||||
|
|
||||||
export type BlobStore = {
|
export type BlobStore = {
|
||||||
getBlob(hash: string): Promise<Blob>
|
getBlob(hash: string): Promise<Blob | null>
|
||||||
getString(hash: string): Promise<string>
|
getString(hash: string): Promise<string>
|
||||||
putString(content: string): Promise<Blob>
|
putString(content: string): Promise<Blob>
|
||||||
putObject(obj: object): Promise<Blob>
|
putObject(obj: object): Promise<Blob>
|
||||||
|
|
|
@ -101,4 +101,68 @@ describe('StringFileData', function () {
|
||||||
{ id: 'comm2', ranges: [{ pos: 20, length: 5 }], resolved: true },
|
{ id: 'comm2', ranges: [{ pos: 20, length: 5 }], resolved: true },
|
||||||
])
|
])
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('getContent should filter out tracked deletions when passed option', function () {
|
||||||
|
const fileData = new StringFileData(
|
||||||
|
'the quick brown fox jumps over the lazy dog',
|
||||||
|
undefined,
|
||||||
|
[
|
||||||
|
{
|
||||||
|
range: { pos: 4, length: 6 },
|
||||||
|
tracking: {
|
||||||
|
type: 'delete',
|
||||||
|
ts: '2024-01-01T00:00:00.000Z',
|
||||||
|
userId: 'user1',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
range: { pos: 35, length: 5 },
|
||||||
|
tracking: {
|
||||||
|
type: 'delete',
|
||||||
|
ts: '2023-01-01T00:00:00.000Z',
|
||||||
|
userId: 'user2',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(fileData.getContent()).to.equal(
|
||||||
|
'the quick brown fox jumps over the lazy dog'
|
||||||
|
)
|
||||||
|
expect(fileData.getContent({ filterTrackedDeletes: true })).to.equal(
|
||||||
|
'the brown fox jumps over the dog'
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('getContent should keep tracked insertions when passed option to remove tracked changes', function () {
|
||||||
|
const fileData = new StringFileData(
|
||||||
|
'the quick brown fox jumps over the lazy dog',
|
||||||
|
undefined,
|
||||||
|
[
|
||||||
|
{
|
||||||
|
range: { pos: 4, length: 6 },
|
||||||
|
tracking: {
|
||||||
|
type: 'insert',
|
||||||
|
ts: '2024-01-01T00:00:00.000Z',
|
||||||
|
userId: 'user1',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
range: { pos: 35, length: 5 },
|
||||||
|
tracking: {
|
||||||
|
type: 'delete',
|
||||||
|
ts: '2023-01-01T00:00:00.000Z',
|
||||||
|
userId: 'user2',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(fileData.getContent()).to.equal(
|
||||||
|
'the quick brown fox jumps over the lazy dog'
|
||||||
|
)
|
||||||
|
expect(fileData.getContent({ filterTrackedDeletes: true })).to.equal(
|
||||||
|
'the quick brown fox jumps over the dog'
|
||||||
|
)
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
|
@ -22,6 +22,8 @@ const postgresBackend = require('./postgres')
|
||||||
const mongoBackend = require('./mongo')
|
const mongoBackend = require('./mongo')
|
||||||
const logger = require('@overleaf/logger')
|
const logger = require('@overleaf/logger')
|
||||||
|
|
||||||
|
/** @typedef {import('stream').Readable} Readable */
|
||||||
|
|
||||||
const GLOBAL_BLOBS = new Map()
|
const GLOBAL_BLOBS = new Map()
|
||||||
|
|
||||||
function makeGlobalKey(hash) {
|
function makeGlobalKey(hash) {
|
||||||
|
@ -273,7 +275,7 @@ class BlobStore {
|
||||||
* failure, so the caller must be prepared to retry on errors, if appropriate.
|
* failure, so the caller must be prepared to retry on errors, if appropriate.
|
||||||
*
|
*
|
||||||
* @param {string} hash hexadecimal SHA-1 hash
|
* @param {string} hash hexadecimal SHA-1 hash
|
||||||
* @return {stream} a stream to read the file
|
* @return {Promise.<Readable>} a stream to read the file
|
||||||
*/
|
*/
|
||||||
async getStream(hash) {
|
async getStream(hash) {
|
||||||
assert.blobHash(hash, 'bad hash')
|
assert.blobHash(hash, 'bad hash')
|
||||||
|
|
|
@ -1,16 +1,24 @@
|
||||||
|
// @ts-check
|
||||||
'use strict'
|
'use strict'
|
||||||
|
|
||||||
|
/** @typedef {import('overleaf-editor-core/types').Snapshot} Snapshot */
|
||||||
|
|
||||||
const Archive = require('archiver')
|
const Archive = require('archiver')
|
||||||
const BPromise = require('bluebird')
|
const BPromise = require('bluebird')
|
||||||
const fs = require('fs')
|
const fs = require('fs')
|
||||||
const { pipeline } = require('stream')
|
const { pipeline } = require('stream')
|
||||||
|
|
||||||
const core = require('overleaf-editor-core')
|
const core = require('overleaf-editor-core')
|
||||||
|
|
||||||
const Snapshot = core.Snapshot
|
const Snapshot = core.Snapshot
|
||||||
const OError = require('@overleaf/o-error')
|
const OError = require('@overleaf/o-error')
|
||||||
|
|
||||||
const assert = require('./assert')
|
const assert = require('./assert')
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @typedef {import('../../storage/lib/blob_store/index').BlobStore} BlobStore
|
||||||
|
*/
|
||||||
|
|
||||||
// The maximum safe concurrency appears to be 1.
|
// The maximum safe concurrency appears to be 1.
|
||||||
// https://github.com/overleaf/issues/issues/1909
|
// https://github.com/overleaf/issues/issues/1909
|
||||||
const FETCH_CONCURRENCY = 1 // number of files to fetch at once
|
const FETCH_CONCURRENCY = 1 // number of files to fetch at once
|
||||||
|
@ -21,101 +29,113 @@ class DownloadError extends OError {
|
||||||
super(`ProjectArchive: blob download failed: ${hash}`, { hash })
|
super(`ProjectArchive: blob download failed: ${hash}`, { hash })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ProjectArchive.DownloadError = DownloadError
|
|
||||||
|
|
||||||
class ArchiveTimeout extends OError {
|
class ArchiveTimeout extends OError {
|
||||||
constructor() {
|
constructor() {
|
||||||
super('ProjectArchive timed out')
|
super('ProjectArchive timed out')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ProjectArchive.ArchiveTimeout = ArchiveTimeout
|
|
||||||
|
|
||||||
/**
|
class MissingfileError extends OError {
|
||||||
* @constructor
|
constructor() {
|
||||||
* @param {Snapshot} snapshot
|
super('ProjectArchive: attempting to look up a file that does not exist')
|
||||||
* @param {?number} timeout in ms
|
}
|
||||||
* @classdesc
|
|
||||||
* Writes the project snapshot to a zip file.
|
|
||||||
*/
|
|
||||||
function ProjectArchive(snapshot, timeout) {
|
|
||||||
assert.instance(snapshot, Snapshot)
|
|
||||||
this.snapshot = snapshot
|
|
||||||
this.timeout = timeout || DEFAULT_ZIP_TIMEOUT
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
class ProjectArchive {
|
||||||
* Write zip archive to the given file path.
|
static ArchiveTimeout = ArchiveTimeout
|
||||||
*
|
static MissingfileError = MissingfileError
|
||||||
* @param {BlobStore} blobStore
|
static DownloadError = DownloadError
|
||||||
* @param {string} zipFilePath
|
|
||||||
*/
|
|
||||||
ProjectArchive.prototype.writeZip = function projectArchiveToZip(
|
|
||||||
blobStore,
|
|
||||||
zipFilePath
|
|
||||||
) {
|
|
||||||
const snapshot = this.snapshot
|
|
||||||
const timeout = this.timeout
|
|
||||||
|
|
||||||
const startTime = process.hrtime()
|
/**
|
||||||
const archive = new Archive('zip')
|
* @constructor
|
||||||
|
* @param {Snapshot} snapshot
|
||||||
// Convert elapsed seconds and nanoseconds to milliseconds.
|
* @param {?number} timeout in ms
|
||||||
function findElapsedMilliseconds() {
|
* @classdesc
|
||||||
const elapsed = process.hrtime(startTime)
|
* Writes the project snapshot to a zip file.
|
||||||
return elapsed[0] * 1e3 + elapsed[1] * 1e-6
|
*/
|
||||||
|
constructor(snapshot, timeout) {
|
||||||
|
assert.instance(snapshot, Snapshot)
|
||||||
|
this.snapshot = snapshot
|
||||||
|
this.timeout = timeout || DEFAULT_ZIP_TIMEOUT
|
||||||
}
|
}
|
||||||
|
|
||||||
function addFileToArchive(pathname) {
|
/**
|
||||||
if (findElapsedMilliseconds() > timeout) {
|
* Write zip archive to the given file path.
|
||||||
throw new ProjectArchive.ArchiveTimeout()
|
*
|
||||||
|
* @param {BlobStore} blobStore
|
||||||
|
* @param {string} zipFilePath
|
||||||
|
*/
|
||||||
|
writeZip(blobStore, zipFilePath) {
|
||||||
|
const snapshot = this.snapshot
|
||||||
|
const timeout = this.timeout
|
||||||
|
|
||||||
|
const startTime = process.hrtime()
|
||||||
|
const archive = new Archive('zip')
|
||||||
|
|
||||||
|
// Convert elapsed seconds and nanoseconds to milliseconds.
|
||||||
|
function findElapsedMilliseconds() {
|
||||||
|
const elapsed = process.hrtime(startTime)
|
||||||
|
return elapsed[0] * 1e3 + elapsed[1] * 1e-6
|
||||||
}
|
}
|
||||||
|
|
||||||
const file = snapshot.getFile(pathname)
|
function addFileToArchive(pathname) {
|
||||||
return file.load('eager', blobStore).then(function () {
|
if (findElapsedMilliseconds() > timeout) {
|
||||||
const content = file.getContent()
|
throw new ProjectArchive.ArchiveTimeout()
|
||||||
if (content === null) {
|
|
||||||
return streamFileToArchive(pathname, file).catch(function (err) {
|
|
||||||
throw new ProjectArchive.DownloadError(file.getHash()).withCause(err)
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
archive.append(content, { name: pathname })
|
|
||||||
}
|
}
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
function streamFileToArchive(pathname, file) {
|
const file = snapshot.getFile(pathname)
|
||||||
return new BPromise(function (resolve, reject) {
|
if (!file) {
|
||||||
blobStore
|
throw new ProjectArchive.MissingfileError()
|
||||||
.getStream(file.getHash())
|
|
||||||
.then(stream => {
|
|
||||||
stream.on('error', reject)
|
|
||||||
stream.on('end', resolve)
|
|
||||||
archive.append(stream, { name: pathname })
|
|
||||||
})
|
|
||||||
.catch(reject)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
const addFilesToArchiveAndFinalize = BPromise.map(
|
|
||||||
snapshot.getFilePathnames(),
|
|
||||||
addFileToArchive,
|
|
||||||
{ concurrency: FETCH_CONCURRENCY }
|
|
||||||
).then(function () {
|
|
||||||
archive.finalize()
|
|
||||||
})
|
|
||||||
|
|
||||||
const streamArchiveToFile = new BPromise(function (resolve, reject) {
|
|
||||||
const stream = fs.createWriteStream(zipFilePath)
|
|
||||||
pipeline(archive, stream, function (err) {
|
|
||||||
if (err) {
|
|
||||||
reject(err)
|
|
||||||
} else {
|
|
||||||
resolve()
|
|
||||||
}
|
}
|
||||||
})
|
return file.load('eager', blobStore).then(function () {
|
||||||
})
|
const content = file.getContent({ filterTrackedDeletes: true })
|
||||||
|
if (content === null) {
|
||||||
|
return streamFileToArchive(pathname, file).catch(function (err) {
|
||||||
|
throw new ProjectArchive.DownloadError(file.getHash()).withCause(
|
||||||
|
err
|
||||||
|
)
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
archive.append(content, { name: pathname })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
return BPromise.join(streamArchiveToFile, addFilesToArchiveAndFinalize)
|
function streamFileToArchive(pathname, file) {
|
||||||
|
return new BPromise(function (resolve, reject) {
|
||||||
|
blobStore
|
||||||
|
.getStream(file.getHash())
|
||||||
|
.then(stream => {
|
||||||
|
stream.on('error', reject)
|
||||||
|
stream.on('end', resolve)
|
||||||
|
archive.append(stream, { name: pathname })
|
||||||
|
})
|
||||||
|
.catch(reject)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
const addFilesToArchiveAndFinalize = BPromise.map(
|
||||||
|
snapshot.getFilePathnames(),
|
||||||
|
addFileToArchive,
|
||||||
|
{ concurrency: FETCH_CONCURRENCY }
|
||||||
|
).then(function () {
|
||||||
|
archive.finalize()
|
||||||
|
})
|
||||||
|
|
||||||
|
const streamArchiveToFile = new BPromise(function (resolve, reject) {
|
||||||
|
const stream = fs.createWriteStream(zipFilePath)
|
||||||
|
pipeline(archive, stream, function (err) {
|
||||||
|
if (err) {
|
||||||
|
reject(err)
|
||||||
|
} else {
|
||||||
|
resolve()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
return BPromise.join(streamArchiveToFile, addFilesToArchiveAndFinalize)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = ProjectArchive
|
module.exports = ProjectArchive
|
||||||
|
|
|
@ -34,7 +34,7 @@ class ZipStore {
|
||||||
/**
|
/**
|
||||||
* Generate signed link to access the zip file.
|
* Generate signed link to access the zip file.
|
||||||
*
|
*
|
||||||
* @param {number} projectId
|
* @param {number | string} projectId
|
||||||
* @param {number} version
|
* @param {number} version
|
||||||
* @return {string}
|
* @return {string}
|
||||||
*/
|
*/
|
||||||
|
@ -49,7 +49,7 @@ class ZipStore {
|
||||||
/**
|
/**
|
||||||
* Generate a zip of the given snapshot.
|
* Generate a zip of the given snapshot.
|
||||||
*
|
*
|
||||||
* @param {number} projectId
|
* @param {number | string} projectId
|
||||||
* @param {number} version
|
* @param {number} version
|
||||||
* @param {Snapshot} snapshot
|
* @param {Snapshot} snapshot
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -53,4 +53,49 @@ describe('zipStore', function () {
|
||||||
expect(entries.length).to.equal(1)
|
expect(entries.length).to.equal(1)
|
||||||
expect(entries[0].fileName).to.equal('hello.txt')
|
expect(entries[0].fileName).to.equal('hello.txt')
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('filters out tracked deletes', async function () {
|
||||||
|
const projectId = fixtures.docs.uninitializedProject.id
|
||||||
|
const version = 1
|
||||||
|
const testSnapshot = new Snapshot()
|
||||||
|
testSnapshot.addFile(
|
||||||
|
'test.tex',
|
||||||
|
File.fromRaw({
|
||||||
|
content: 'the quick brown fox jumps over the lazy dog',
|
||||||
|
trackedChanges: [
|
||||||
|
{
|
||||||
|
range: { pos: 4, length: 6 },
|
||||||
|
tracking: {
|
||||||
|
type: 'delete',
|
||||||
|
ts: '2024-01-01T00:00:00.000Z',
|
||||||
|
userId: 'user1',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
range: { pos: 35, length: 5 },
|
||||||
|
tracking: {
|
||||||
|
type: 'delete',
|
||||||
|
ts: '2023-01-01T00:00:00.000Z',
|
||||||
|
userId: 'user2',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
})
|
||||||
|
)
|
||||||
|
|
||||||
|
const zipUrl = await zipStore.getSignedUrl(projectId, version)
|
||||||
|
// Build the zip file.
|
||||||
|
await zipStore.storeZip(projectId, version, testSnapshot)
|
||||||
|
// Now we should be able to fetch it.
|
||||||
|
const postZipResponse = await fetch(zipUrl)
|
||||||
|
expect(postZipResponse.status).to.equal(200)
|
||||||
|
const zipBuffer = await postZipResponse.buffer()
|
||||||
|
await fs.writeFileAsync(zipFilePath, zipBuffer)
|
||||||
|
const entries = await getZipEntries(zipFilePath)
|
||||||
|
expect(entries.length).to.equal(1)
|
||||||
|
expect(entries[0].fileName).to.equal('test.tex')
|
||||||
|
expect(entries[0].uncompressedSize).to.equal(
|
||||||
|
'the brown fox jumps over the dog'.length
|
||||||
|
)
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
Loading…
Reference in a new issue