mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-21 20:47:08 -05:00
Merge pull request #17359 from overleaf/mj-download-filtering
[overleaf-editor-core+history-v1] Filter tracked deletes when creating zip files GitOrigin-RevId: 1c19d3cb849150d33e750772399ea81c280fdd57
This commit is contained in:
parent
4ef7bc617b
commit
ce19b5f568
10 changed files with 247 additions and 87 deletions
|
@ -14,6 +14,7 @@ const StringFileData = require('./file_data/string_file_data')
|
|||
* @typedef {import("./types").StringFileRawData} StringFileRawData
|
||||
* @typedef {import("./types").CommentRawData} CommentRawData
|
||||
* @typedef {import("./operation/text_operation")} TextOperation
|
||||
* @typedef {{filterTrackedDeletes?: boolean}} FileGetContentOptions
|
||||
*/
|
||||
|
||||
class NotEditableError extends OError {
|
||||
|
@ -133,10 +134,11 @@ class File {
|
|||
* The content of the file, if it is known and if this file has UTF-8 encoded
|
||||
* content.
|
||||
*
|
||||
* @param {FileGetContentOptions} [opts]
|
||||
* @return {string | null | undefined}
|
||||
*/
|
||||
getContent() {
|
||||
return this.data.getContent()
|
||||
getContent(opts = {}) {
|
||||
return this.data.getContent(opts)
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -90,6 +90,9 @@ class HashFileData extends FileData {
|
|||
let rangesBlob
|
||||
if (this.rangesHash) {
|
||||
rangesBlob = await blobStore.getBlob(this.rangesHash)
|
||||
if (!rangesBlob) {
|
||||
throw new Error('Failed to look up rangesHash in blobStore')
|
||||
}
|
||||
}
|
||||
if (!blob) throw new Error('blob not found: ' + this.hash)
|
||||
return FileData.createLazyFromBlobs(blob, rangesBlob)
|
||||
|
@ -102,6 +105,9 @@ class HashFileData extends FileData {
|
|||
*/
|
||||
async toHollow(blobStore) {
|
||||
const blob = await blobStore.getBlob(this.hash)
|
||||
if (!blob) {
|
||||
throw new Error('Failed to look up hash in blobStore')
|
||||
}
|
||||
return FileData.createHollow(blob.getByteLength(), blob.getStringLength())
|
||||
}
|
||||
|
||||
|
|
|
@ -90,9 +90,10 @@ class FileData {
|
|||
|
||||
/**
|
||||
* @see File#getContent
|
||||
* @param {import('../file').FileGetContentOptions} [opts]
|
||||
* @return {string | null | undefined}
|
||||
*/
|
||||
getContent() {
|
||||
getContent(opts = {}) {
|
||||
return null
|
||||
}
|
||||
|
||||
|
|
|
@ -66,9 +66,29 @@ class StringFileData extends FileData {
|
|||
return true
|
||||
}
|
||||
|
||||
/** @inheritdoc */
|
||||
getContent() {
|
||||
return this.content
|
||||
/**
|
||||
* @inheritdoc
|
||||
* @param {import('../file').FileGetContentOptions} [opts]
|
||||
*/
|
||||
getContent(opts = {}) {
|
||||
let content = ''
|
||||
let cursor = 0
|
||||
if (opts.filterTrackedDeletes) {
|
||||
for (const tc of this.trackedChanges.trackedChanges) {
|
||||
if (tc.tracking.type !== 'delete') {
|
||||
continue
|
||||
}
|
||||
if (cursor < tc.range.start) {
|
||||
content += this.content.slice(cursor, tc.range.start)
|
||||
}
|
||||
// skip the tracked change
|
||||
cursor = tc.range.end
|
||||
}
|
||||
}
|
||||
if (cursor < this.content.length) {
|
||||
content += this.content.slice(cursor)
|
||||
}
|
||||
return content
|
||||
}
|
||||
|
||||
/** @inheritdoc */
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import Blob from './blob'
|
||||
|
||||
export type BlobStore = {
|
||||
getBlob(hash: string): Promise<Blob>
|
||||
getBlob(hash: string): Promise<Blob | null>
|
||||
getString(hash: string): Promise<string>
|
||||
putString(content: string): Promise<Blob>
|
||||
putObject(obj: object): Promise<Blob>
|
||||
|
|
|
@ -101,4 +101,68 @@ describe('StringFileData', function () {
|
|||
{ id: 'comm2', ranges: [{ pos: 20, length: 5 }], resolved: true },
|
||||
])
|
||||
})
|
||||
|
||||
it('getContent should filter out tracked deletions when passed option', function () {
|
||||
const fileData = new StringFileData(
|
||||
'the quick brown fox jumps over the lazy dog',
|
||||
undefined,
|
||||
[
|
||||
{
|
||||
range: { pos: 4, length: 6 },
|
||||
tracking: {
|
||||
type: 'delete',
|
||||
ts: '2024-01-01T00:00:00.000Z',
|
||||
userId: 'user1',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: { pos: 35, length: 5 },
|
||||
tracking: {
|
||||
type: 'delete',
|
||||
ts: '2023-01-01T00:00:00.000Z',
|
||||
userId: 'user2',
|
||||
},
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
expect(fileData.getContent()).to.equal(
|
||||
'the quick brown fox jumps over the lazy dog'
|
||||
)
|
||||
expect(fileData.getContent({ filterTrackedDeletes: true })).to.equal(
|
||||
'the brown fox jumps over the dog'
|
||||
)
|
||||
})
|
||||
|
||||
it('getContent should keep tracked insertions when passed option to remove tracked changes', function () {
|
||||
const fileData = new StringFileData(
|
||||
'the quick brown fox jumps over the lazy dog',
|
||||
undefined,
|
||||
[
|
||||
{
|
||||
range: { pos: 4, length: 6 },
|
||||
tracking: {
|
||||
type: 'insert',
|
||||
ts: '2024-01-01T00:00:00.000Z',
|
||||
userId: 'user1',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: { pos: 35, length: 5 },
|
||||
tracking: {
|
||||
type: 'delete',
|
||||
ts: '2023-01-01T00:00:00.000Z',
|
||||
userId: 'user2',
|
||||
},
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
expect(fileData.getContent()).to.equal(
|
||||
'the quick brown fox jumps over the lazy dog'
|
||||
)
|
||||
expect(fileData.getContent({ filterTrackedDeletes: true })).to.equal(
|
||||
'the quick brown fox jumps over the dog'
|
||||
)
|
||||
})
|
||||
})
|
||||
|
|
|
@ -22,6 +22,8 @@ const postgresBackend = require('./postgres')
|
|||
const mongoBackend = require('./mongo')
|
||||
const logger = require('@overleaf/logger')
|
||||
|
||||
/** @typedef {import('stream').Readable} Readable */
|
||||
|
||||
const GLOBAL_BLOBS = new Map()
|
||||
|
||||
function makeGlobalKey(hash) {
|
||||
|
@ -273,7 +275,7 @@ class BlobStore {
|
|||
* failure, so the caller must be prepared to retry on errors, if appropriate.
|
||||
*
|
||||
* @param {string} hash hexadecimal SHA-1 hash
|
||||
* @return {stream} a stream to read the file
|
||||
* @return {Promise.<Readable>} a stream to read the file
|
||||
*/
|
||||
async getStream(hash) {
|
||||
assert.blobHash(hash, 'bad hash')
|
||||
|
|
|
@ -1,16 +1,24 @@
|
|||
// @ts-check
|
||||
'use strict'
|
||||
|
||||
/** @typedef {import('overleaf-editor-core/types').Snapshot} Snapshot */
|
||||
|
||||
const Archive = require('archiver')
|
||||
const BPromise = require('bluebird')
|
||||
const fs = require('fs')
|
||||
const { pipeline } = require('stream')
|
||||
|
||||
const core = require('overleaf-editor-core')
|
||||
|
||||
const Snapshot = core.Snapshot
|
||||
const OError = require('@overleaf/o-error')
|
||||
|
||||
const assert = require('./assert')
|
||||
|
||||
/**
|
||||
* @typedef {import('../../storage/lib/blob_store/index').BlobStore} BlobStore
|
||||
*/
|
||||
|
||||
// The maximum safe concurrency appears to be 1.
|
||||
// https://github.com/overleaf/issues/issues/1909
|
||||
const FETCH_CONCURRENCY = 1 // number of files to fetch at once
|
||||
|
@ -21,101 +29,113 @@ class DownloadError extends OError {
|
|||
super(`ProjectArchive: blob download failed: ${hash}`, { hash })
|
||||
}
|
||||
}
|
||||
ProjectArchive.DownloadError = DownloadError
|
||||
|
||||
class ArchiveTimeout extends OError {
|
||||
constructor() {
|
||||
super('ProjectArchive timed out')
|
||||
}
|
||||
}
|
||||
ProjectArchive.ArchiveTimeout = ArchiveTimeout
|
||||
|
||||
/**
|
||||
* @constructor
|
||||
* @param {Snapshot} snapshot
|
||||
* @param {?number} timeout in ms
|
||||
* @classdesc
|
||||
* Writes the project snapshot to a zip file.
|
||||
*/
|
||||
function ProjectArchive(snapshot, timeout) {
|
||||
assert.instance(snapshot, Snapshot)
|
||||
this.snapshot = snapshot
|
||||
this.timeout = timeout || DEFAULT_ZIP_TIMEOUT
|
||||
class MissingfileError extends OError {
|
||||
constructor() {
|
||||
super('ProjectArchive: attempting to look up a file that does not exist')
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write zip archive to the given file path.
|
||||
*
|
||||
* @param {BlobStore} blobStore
|
||||
* @param {string} zipFilePath
|
||||
*/
|
||||
ProjectArchive.prototype.writeZip = function projectArchiveToZip(
|
||||
blobStore,
|
||||
zipFilePath
|
||||
) {
|
||||
const snapshot = this.snapshot
|
||||
const timeout = this.timeout
|
||||
class ProjectArchive {
|
||||
static ArchiveTimeout = ArchiveTimeout
|
||||
static MissingfileError = MissingfileError
|
||||
static DownloadError = DownloadError
|
||||
|
||||
const startTime = process.hrtime()
|
||||
const archive = new Archive('zip')
|
||||
|
||||
// Convert elapsed seconds and nanoseconds to milliseconds.
|
||||
function findElapsedMilliseconds() {
|
||||
const elapsed = process.hrtime(startTime)
|
||||
return elapsed[0] * 1e3 + elapsed[1] * 1e-6
|
||||
/**
|
||||
* @constructor
|
||||
* @param {Snapshot} snapshot
|
||||
* @param {?number} timeout in ms
|
||||
* @classdesc
|
||||
* Writes the project snapshot to a zip file.
|
||||
*/
|
||||
constructor(snapshot, timeout) {
|
||||
assert.instance(snapshot, Snapshot)
|
||||
this.snapshot = snapshot
|
||||
this.timeout = timeout || DEFAULT_ZIP_TIMEOUT
|
||||
}
|
||||
|
||||
function addFileToArchive(pathname) {
|
||||
if (findElapsedMilliseconds() > timeout) {
|
||||
throw new ProjectArchive.ArchiveTimeout()
|
||||
/**
|
||||
* Write zip archive to the given file path.
|
||||
*
|
||||
* @param {BlobStore} blobStore
|
||||
* @param {string} zipFilePath
|
||||
*/
|
||||
writeZip(blobStore, zipFilePath) {
|
||||
const snapshot = this.snapshot
|
||||
const timeout = this.timeout
|
||||
|
||||
const startTime = process.hrtime()
|
||||
const archive = new Archive('zip')
|
||||
|
||||
// Convert elapsed seconds and nanoseconds to milliseconds.
|
||||
function findElapsedMilliseconds() {
|
||||
const elapsed = process.hrtime(startTime)
|
||||
return elapsed[0] * 1e3 + elapsed[1] * 1e-6
|
||||
}
|
||||
|
||||
const file = snapshot.getFile(pathname)
|
||||
return file.load('eager', blobStore).then(function () {
|
||||
const content = file.getContent()
|
||||
if (content === null) {
|
||||
return streamFileToArchive(pathname, file).catch(function (err) {
|
||||
throw new ProjectArchive.DownloadError(file.getHash()).withCause(err)
|
||||
})
|
||||
} else {
|
||||
archive.append(content, { name: pathname })
|
||||
function addFileToArchive(pathname) {
|
||||
if (findElapsedMilliseconds() > timeout) {
|
||||
throw new ProjectArchive.ArchiveTimeout()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function streamFileToArchive(pathname, file) {
|
||||
return new BPromise(function (resolve, reject) {
|
||||
blobStore
|
||||
.getStream(file.getHash())
|
||||
.then(stream => {
|
||||
stream.on('error', reject)
|
||||
stream.on('end', resolve)
|
||||
archive.append(stream, { name: pathname })
|
||||
})
|
||||
.catch(reject)
|
||||
})
|
||||
}
|
||||
|
||||
const addFilesToArchiveAndFinalize = BPromise.map(
|
||||
snapshot.getFilePathnames(),
|
||||
addFileToArchive,
|
||||
{ concurrency: FETCH_CONCURRENCY }
|
||||
).then(function () {
|
||||
archive.finalize()
|
||||
})
|
||||
|
||||
const streamArchiveToFile = new BPromise(function (resolve, reject) {
|
||||
const stream = fs.createWriteStream(zipFilePath)
|
||||
pipeline(archive, stream, function (err) {
|
||||
if (err) {
|
||||
reject(err)
|
||||
} else {
|
||||
resolve()
|
||||
const file = snapshot.getFile(pathname)
|
||||
if (!file) {
|
||||
throw new ProjectArchive.MissingfileError()
|
||||
}
|
||||
})
|
||||
})
|
||||
return file.load('eager', blobStore).then(function () {
|
||||
const content = file.getContent({ filterTrackedDeletes: true })
|
||||
if (content === null) {
|
||||
return streamFileToArchive(pathname, file).catch(function (err) {
|
||||
throw new ProjectArchive.DownloadError(file.getHash()).withCause(
|
||||
err
|
||||
)
|
||||
})
|
||||
} else {
|
||||
archive.append(content, { name: pathname })
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return BPromise.join(streamArchiveToFile, addFilesToArchiveAndFinalize)
|
||||
function streamFileToArchive(pathname, file) {
|
||||
return new BPromise(function (resolve, reject) {
|
||||
blobStore
|
||||
.getStream(file.getHash())
|
||||
.then(stream => {
|
||||
stream.on('error', reject)
|
||||
stream.on('end', resolve)
|
||||
archive.append(stream, { name: pathname })
|
||||
})
|
||||
.catch(reject)
|
||||
})
|
||||
}
|
||||
|
||||
const addFilesToArchiveAndFinalize = BPromise.map(
|
||||
snapshot.getFilePathnames(),
|
||||
addFileToArchive,
|
||||
{ concurrency: FETCH_CONCURRENCY }
|
||||
).then(function () {
|
||||
archive.finalize()
|
||||
})
|
||||
|
||||
const streamArchiveToFile = new BPromise(function (resolve, reject) {
|
||||
const stream = fs.createWriteStream(zipFilePath)
|
||||
pipeline(archive, stream, function (err) {
|
||||
if (err) {
|
||||
reject(err)
|
||||
} else {
|
||||
resolve()
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
return BPromise.join(streamArchiveToFile, addFilesToArchiveAndFinalize)
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ProjectArchive
|
||||
|
|
|
@ -34,7 +34,7 @@ class ZipStore {
|
|||
/**
|
||||
* Generate signed link to access the zip file.
|
||||
*
|
||||
* @param {number} projectId
|
||||
* @param {number | string} projectId
|
||||
* @param {number} version
|
||||
* @return {string}
|
||||
*/
|
||||
|
@ -49,7 +49,7 @@ class ZipStore {
|
|||
/**
|
||||
* Generate a zip of the given snapshot.
|
||||
*
|
||||
* @param {number} projectId
|
||||
* @param {number | string} projectId
|
||||
* @param {number} version
|
||||
* @param {Snapshot} snapshot
|
||||
*/
|
||||
|
|
|
@ -53,4 +53,49 @@ describe('zipStore', function () {
|
|||
expect(entries.length).to.equal(1)
|
||||
expect(entries[0].fileName).to.equal('hello.txt')
|
||||
})
|
||||
|
||||
it('filters out tracked deletes', async function () {
|
||||
const projectId = fixtures.docs.uninitializedProject.id
|
||||
const version = 1
|
||||
const testSnapshot = new Snapshot()
|
||||
testSnapshot.addFile(
|
||||
'test.tex',
|
||||
File.fromRaw({
|
||||
content: 'the quick brown fox jumps over the lazy dog',
|
||||
trackedChanges: [
|
||||
{
|
||||
range: { pos: 4, length: 6 },
|
||||
tracking: {
|
||||
type: 'delete',
|
||||
ts: '2024-01-01T00:00:00.000Z',
|
||||
userId: 'user1',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: { pos: 35, length: 5 },
|
||||
tracking: {
|
||||
type: 'delete',
|
||||
ts: '2023-01-01T00:00:00.000Z',
|
||||
userId: 'user2',
|
||||
},
|
||||
},
|
||||
],
|
||||
})
|
||||
)
|
||||
|
||||
const zipUrl = await zipStore.getSignedUrl(projectId, version)
|
||||
// Build the zip file.
|
||||
await zipStore.storeZip(projectId, version, testSnapshot)
|
||||
// Now we should be able to fetch it.
|
||||
const postZipResponse = await fetch(zipUrl)
|
||||
expect(postZipResponse.status).to.equal(200)
|
||||
const zipBuffer = await postZipResponse.buffer()
|
||||
await fs.writeFileAsync(zipFilePath, zipBuffer)
|
||||
const entries = await getZipEntries(zipFilePath)
|
||||
expect(entries.length).to.equal(1)
|
||||
expect(entries[0].fileName).to.equal('test.tex')
|
||||
expect(entries[0].uncompressedSize).to.equal(
|
||||
'the brown fox jumps over the dog'.length
|
||||
)
|
||||
})
|
||||
})
|
||||
|
|
Loading…
Reference in a new issue