From b80d0e863444112c517ad14e02c7cea77081bb11 Mon Sep 17 00:00:00 2001 From: Mathias Jakobsen Date: Mon, 26 Feb 2024 13:51:52 +0000 Subject: [PATCH] Merge pull request #17047 from overleaf/mj-ranges-in-blobstore [overleaf-editor-core] Store tracked changes and comments in blob-store GitOrigin-RevId: 05cc69044bf26808b64b2687d81f2c40ef4cc972 --- libraries/overleaf-editor-core/lib/file.js | 20 ++- .../lib/file_data/hash_file_data.js | 84 +++++++++++-- .../lib/file_data/index.js | 22 +++- .../lib/file_data/lazy_string_file_data.js | 39 +++++- .../lib/file_data/string_file_data.js | 10 ++ .../overleaf-editor-core/lib/snapshot.js | 2 + libraries/overleaf-editor-core/lib/types.ts | 8 ++ libraries/overleaf-editor-core/package.json | 3 +- .../overleaf-editor-core/test/file.test.js | 8 +- .../test/lazy_string_file_data.test.js | 118 ++++++++++++++++-- package-lock.json | 2 + .../storage/lib/blob_store/index.js | 51 +++++++- services/history-v1/storage/lib/streams.js | 46 +++---- 13 files changed, 351 insertions(+), 62 deletions(-) diff --git a/libraries/overleaf-editor-core/lib/file.js b/libraries/overleaf-editor-core/lib/file.js index c3849728b4..7ad4d97663 100644 --- a/libraries/overleaf-editor-core/lib/file.js +++ b/libraries/overleaf-editor-core/lib/file.js @@ -67,11 +67,12 @@ class File { /** * @param {string} hash + * @param {string} [rangesHash] * @param {Object} [metadata] * @return {File} */ - static fromHash(hash, metadata) { - return new File(new HashFileData(hash), metadata) + static fromHash(hash, rangesHash, metadata) { + return new File(new HashFileData(hash, rangesHash), metadata) } /** @@ -95,11 +96,12 @@ class File { /** * @param {Blob} blob + * @param {Blob} [blob] * @param {Object} [metadata] * @return {File} */ - static createLazyFromBlob(blob, metadata) { - return new File(FileData.createLazyFromBlob(blob), metadata) + static createLazyFromBlobs(blob, rangesBlob, metadata) { + return new File(FileData.createLazyFromBlobs(blob, rangesBlob), metadata) } toRaw() { @@ -117,6 +119,16 @@ class File { return this.data.getHash() } + /** + * Hexadecimal SHA-1 hash of the ranges content (comments + tracked changes), + * if known. + * + * @return {string | null | undefined} + */ + getRangesHash() { + return this.data.getRangesHash() + } + /** * The content of the file, if it is known and if this file has UTF-8 encoded * content. diff --git a/libraries/overleaf-editor-core/lib/file_data/hash_file_data.js b/libraries/overleaf-editor-core/lib/file_data/hash_file_data.js index a15a35c232..0105e7cc19 100644 --- a/libraries/overleaf-editor-core/lib/file_data/hash_file_data.js +++ b/libraries/overleaf-editor-core/lib/file_data/hash_file_data.js @@ -1,58 +1,120 @@ +// @ts-check 'use strict' const assert = require('check-types').assert const Blob = require('../blob') const FileData = require('./') +/** + * @typedef {import('./string_file_data')} StringFileData + * @typedef {import('./lazy_string_file_data')} LazyStringFileData + * @typedef {import('./hollow_string_file_data')} HollowStringFileData + * @typedef {import('../types').BlobStore} BlobStore + */ class HashFileData extends FileData { /** * @constructor * @param {string} hash + * @param {string} [rangesHash] * @see FileData */ - constructor(hash) { + constructor(hash, rangesHash) { super() assert.match(hash, Blob.HEX_HASH_RX, 'HashFileData: bad hash') + if (rangesHash) { + assert.match( + rangesHash, + Blob.HEX_HASH_RX, + 'HashFileData: bad ranges hash' + ) + } this.hash = hash + this.rangesHash = rangesHash } + /** + * + * @param {{hash: string, rangesHash?: string}} raw + * @returns + */ static fromRaw(raw) { - return new HashFileData(raw.hash) + return new HashFileData(raw.hash, raw.rangesHash) } - /** @inheritdoc */ + /** + * @inheritdoc + * @returns {{hash: string, rangesHash?: string}} + */ toRaw() { - return { hash: this.hash } + const raw = { hash: this.hash } + if (this.rangesHash) { + raw.rangesHash = this.rangesHash + } + return raw } - /** @inheritdoc */ + /** + * @inheritdoc + * @returns {string} + */ getHash() { return this.hash } - /** @inheritdoc */ + /** + * @inheritdoc + * @returns {string | undefined} + */ + getRangesHash() { + return this.rangesHash + } + + /** + * @inheritdoc + * @param {BlobStore} blobStore + * @returns {Promise} + */ async toEager(blobStore) { const lazyFileData = await this.toLazy(blobStore) return await lazyFileData.toEager(blobStore) } - /** @inheritdoc */ + /** + * @inheritdoc + * @param {BlobStore} blobStore + * @returns {Promise} + */ async toLazy(blobStore) { const blob = await blobStore.getBlob(this.hash) + let rangesBlob + if (this.rangesHash) { + rangesBlob = await blobStore.getBlob(this.rangesHash) + } if (!blob) throw new Error('blob not found: ' + this.hash) - return FileData.createLazyFromBlob(blob) + return FileData.createLazyFromBlobs(blob, rangesBlob) } - /** @inheritdoc */ + /** + * @inheritdoc + * @param {BlobStore} blobStore + * @returns {Promise} + */ async toHollow(blobStore) { const blob = await blobStore.getBlob(this.hash) return FileData.createHollow(blob.getByteLength(), blob.getStringLength()) } - /** @inheritdoc */ + /** + * @inheritdoc + * @returns {Promise<{hash: string, rangesHash?: string}>} + */ async store() { - return { hash: this.hash } + const raw = { hash: this.hash } + if (this.rangesHash) { + raw.rangesHash = this.rangesHash + } + return raw } } diff --git a/libraries/overleaf-editor-core/lib/file_data/index.js b/libraries/overleaf-editor-core/lib/file_data/index.js index e6ad3c7304..fc1adca3e9 100644 --- a/libraries/overleaf-editor-core/lib/file_data/index.js +++ b/libraries/overleaf-editor-core/lib/file_data/index.js @@ -50,13 +50,21 @@ class FileData { return new HollowStringFileData(stringLength) } - /** @see File.createLazyFromBlob */ - static createLazyFromBlob(blob) { + /** + * @see File.createLazyFromBlob + * @param {Blob} blob + * @param {Blob} [rangesBlob] + */ + static createLazyFromBlobs(blob, rangesBlob) { assert.instance(blob, Blob, 'FileData: bad blob') if (blob.getStringLength() == null) { return new BinaryFileData(blob.getHash(), blob.getByteLength()) } - return new LazyStringFileData(blob.getHash(), blob.getStringLength()) + return new LazyStringFileData( + blob.getHash(), + rangesBlob?.getHash(), + blob.getStringLength() + ) } toRaw() { @@ -72,6 +80,14 @@ class FileData { return null } + /** + * @see File#getHash + * @return {string | null | undefined} + */ + getRangesHash() { + return null + } + /** * @see File#getContent * @return {string | null | undefined} diff --git a/libraries/overleaf-editor-core/lib/file_data/lazy_string_file_data.js b/libraries/overleaf-editor-core/lib/file_data/lazy_string_file_data.js index d8e0de94ae..74687169c7 100644 --- a/libraries/overleaf-editor-core/lib/file_data/lazy_string_file_data.js +++ b/libraries/overleaf-editor-core/lib/file_data/lazy_string_file_data.js @@ -10,20 +10,30 @@ const EagerStringFileData = require('./string_file_data') const EditOperation = require('../operation/edit_operation') const EditOperationBuilder = require('../operation/edit_operation_builder') +/** + * @typedef {import('../types').BlobStore} BlobStore + * @typedef {import('../types').RangesBlob} RangesBlob + */ + class LazyStringFileData extends FileData { /** * @param {string} hash + * @param {string | undefined} rangesHash * @param {number} stringLength * @param {Array.} [operations] * @see FileData */ - constructor(hash, stringLength, operations) { + constructor(hash, rangesHash, stringLength, operations) { super() assert.match(hash, Blob.HEX_HASH_RX) + if (rangesHash) { + assert.match(rangesHash, Blob.HEX_HASH_RX) + } assert.greaterOrEqual(stringLength, 0) assert.maybe.array.of.instance(operations, EditOperation) this.hash = hash + this.rangesHash = rangesHash this.stringLength = stringLength this.operations = operations || [] } @@ -31,6 +41,7 @@ class LazyStringFileData extends FileData { static fromRaw(raw) { return new LazyStringFileData( raw.hash, + raw.rangesHash, raw.stringLength, raw.operations && _.map(raw.operations, EditOperationBuilder.fromJSON) ) @@ -39,6 +50,9 @@ class LazyStringFileData extends FileData { /** @inheritdoc */ toRaw() { const raw = { hash: this.hash, stringLength: this.stringLength } + if (this.rangesHash) { + raw.rangesHash = this.rangesHash + } if (this.operations.length) { raw.operations = _.map(this.operations, function (operation) { return operation.toJSON() @@ -53,6 +67,12 @@ class LazyStringFileData extends FileData { return this.hash } + /** @inheritdoc */ + getRangesHash() { + if (this.operations.length) return null + return this.rangesHash + } + /** @inheritdoc */ isEditable() { return true @@ -86,11 +106,20 @@ class LazyStringFileData extends FileData { /** * @inheritdoc + * @param {BlobStore} blobStore * @returns {Promise} */ async toEager(blobStore) { const content = await blobStore.getString(this.hash) - const file = new EagerStringFileData(content) + let comments + let trackedChanges + if (this.rangesHash) { + /** @type {RangesBlob} */ + const ranges = await blobStore.getObject(this.rangesHash) + comments = ranges.comments + trackedChanges = ranges.trackedChanges + } + const file = new EagerStringFileData(content, comments, trackedChanges) applyOperations(this.operations, file) return file } @@ -114,7 +143,11 @@ class LazyStringFileData extends FileData { /** @inheritdoc */ async store(blobStore) { if (this.operations.length === 0) { - return { hash: this.hash } + const raw = { hash: this.hash } + if (this.rangesHash) { + raw.rangesHash = this.rangesHash + } + return raw } const eager = await this.toEager(blobStore) this.operations.length = 0 diff --git a/libraries/overleaf-editor-core/lib/file_data/string_file_data.js b/libraries/overleaf-editor-core/lib/file_data/string_file_data.js index ebba0a685c..d9b69885fb 100644 --- a/libraries/overleaf-editor-core/lib/file_data/string_file_data.js +++ b/libraries/overleaf-editor-core/lib/file_data/string_file_data.js @@ -13,6 +13,7 @@ const TrackedChangeList = require('./tracked_change_list') * @typedef {import("../types").BlobStore} BlobStore * @typedef {import("../types").CommentsListRawData} CommentsListRawData * @typedef {import("../types").TrackedChangeRawData} TrackedChangeRawData + * @typedef {import('../types').RangesBlob} RangesBlob */ class StringFileData extends FileData { @@ -111,6 +112,15 @@ class StringFileData extends FileData { */ async store(blobStore) { const blob = await blobStore.putString(this.content) + if (this.comments.comments.size || this.trackedChanges.length) { + /** @type {RangesBlob} */ + const ranges = { + comments: this.getComments(), + trackedChanges: this.trackedChanges.toRaw(), + } + const rangesBlob = await blobStore.putObject(ranges) + return { hash: blob.getHash(), rangesHash: rangesBlob.getHash() } + } return { hash: blob.getHash() } } } diff --git a/libraries/overleaf-editor-core/lib/snapshot.js b/libraries/overleaf-editor-core/lib/snapshot.js index b64b91c708..46b9b71c62 100644 --- a/libraries/overleaf-editor-core/lib/snapshot.js +++ b/libraries/overleaf-editor-core/lib/snapshot.js @@ -186,7 +186,9 @@ class Snapshot { // eslint-disable-next-line array-callback-return this.fileMap.map(file => { const hash = file.getHash() + const rangeHash = file.getRangesHash() if (hash) blobHashes.add(hash) + if (rangeHash) blobHashes.add(rangeHash) }) } diff --git a/libraries/overleaf-editor-core/lib/types.ts b/libraries/overleaf-editor-core/lib/types.ts index 5e1b97988e..a537828b75 100644 --- a/libraries/overleaf-editor-core/lib/types.ts +++ b/libraries/overleaf-editor-core/lib/types.ts @@ -1,8 +1,16 @@ import Blob from './blob' export type BlobStore = { + getBlob(hash: string): Promise getString(hash: string): Promise putString(content: string): Promise + putObject(obj: object): Promise + getObject(hash: string): Promise +} + +export type RangesBlob = { + comments: CommentsListRawData + trackedChanges: TrackedChangeRawData[] } type Range = { diff --git a/libraries/overleaf-editor-core/package.json b/libraries/overleaf-editor-core/package.json index 17f19bfb98..a1eca0588d 100644 --- a/libraries/overleaf-editor-core/package.json +++ b/libraries/overleaf-editor-core/package.json @@ -22,7 +22,8 @@ "devDependencies": { "chai": "^3.3.0", "mocha": "^10.2.0", - "typescript": "^5.0.4" + "typescript": "^5.0.4", + "sinon": "^9.2.4" }, "dependencies": { "@overleaf/o-error": "*", diff --git a/libraries/overleaf-editor-core/test/file.test.js b/libraries/overleaf-editor-core/test/file.test.js index 06b11c4f63..e9dab90151 100644 --- a/libraries/overleaf-editor-core/test/file.test.js +++ b/libraries/overleaf-editor-core/test/file.test.js @@ -23,7 +23,7 @@ describe('File', function () { describe('toRaw', function () { it('returns non-empty metadata', function () { const metadata = { main: true } - const file = File.fromHash(File.EMPTY_FILE_HASH, metadata) + const file = File.fromHash(File.EMPTY_FILE_HASH, undefined, metadata) expect(file.toRaw()).to.eql({ hash: File.EMPTY_FILE_HASH, metadata, @@ -35,7 +35,7 @@ describe('File', function () { it('returns a deep clone of metadata', function () { const metadata = { externalFile: { id: 123 } } - const file = File.fromHash(File.EMPTY_FILE_HASH, metadata) + const file = File.fromHash(File.EMPTY_FILE_HASH, undefined, metadata) const raw = file.toRaw() const fileMetadata = file.getMetadata() const rawMetadata = raw.metadata @@ -54,7 +54,7 @@ describe('File', function () { it('returns non-empty metadata', async function () { const metadata = { main: true } - const file = File.fromHash(File.EMPTY_FILE_HASH, metadata) + const file = File.fromHash(File.EMPTY_FILE_HASH, undefined, metadata) const fakeBlobStore = new FakeBlobStore() const raw = await file.store(fakeBlobStore) expect(raw).to.eql({ @@ -65,7 +65,7 @@ describe('File', function () { it('returns a deep clone of metadata', async function () { const metadata = { externalFile: { id: 123 } } - const file = File.fromHash(File.EMPTY_FILE_HASH, metadata) + const file = File.fromHash(File.EMPTY_FILE_HASH, undefined, metadata) const fakeBlobStore = new FakeBlobStore() const raw = await file.store(fakeBlobStore) raw.metadata.externalFile.id = 456 diff --git a/libraries/overleaf-editor-core/test/lazy_string_file_data.test.js b/libraries/overleaf-editor-core/test/lazy_string_file_data.test.js index 25ca2c87b5..58f79da868 100644 --- a/libraries/overleaf-editor-core/test/lazy_string_file_data.test.js +++ b/libraries/overleaf-editor-core/test/lazy_string_file_data.test.js @@ -1,20 +1,53 @@ +// @ts-check 'use strict' const _ = require('lodash') const { expect } = require('chai') +const sinon = require('sinon') const ot = require('..') const File = ot.File const TextOperation = ot.TextOperation const LazyStringFileData = require('../lib/file_data/lazy_string_file_data') +const EagerStringFileData = require('../lib/file_data/string_file_data') describe('LazyStringFileData', function () { + beforeEach(function () { + this.rangesHash = '380de212d09bf8498065833dbf242aaf11184316' + this.fileHash = 'a5675307b61ec2517330622a6e649b4ca1ee5612' + this.blobStore = { + getString: sinon.stub(), + putString: sinon.stub().resolves(new ot.Blob(this.fileHash, 19, 19)), + getObject: sinon.stub(), + putObject: sinon.stub().resolves(new ot.Blob(this.rangesHash, 204, 204)), + } + this.blobStore.getString.withArgs(File.EMPTY_FILE_HASH).resolves('') + this.blobStore.getString + .withArgs(this.fileHash) + .resolves('the quick brown fox') + this.blobStore.getObject.withArgs(this.rangesHash).resolves({ + comments: [ + { id: 'foo', ranges: [{ pos: 0, length: 3 }], resolved: false }, + ], + trackedChanges: [ + { + range: { pos: 4, length: 5 }, + tracking: { + type: 'delete', + userId: 'user1', + ts: '2024-01-01T00:00:00.000Z', + }, + }, + ], + }) + }) + it('uses raw text operations for toRaw and fromRaw', function () { const testHash = File.EMPTY_FILE_HASH - const fileData = new LazyStringFileData(testHash, 0) + const fileData = new LazyStringFileData(testHash, undefined, 0) let roundTripFileData - expect(fileData.toRaw()).to.eql({ + expect(fileData.toRaw()).to.deep.equal({ hash: testHash, stringLength: 0, }) @@ -24,7 +57,7 @@ describe('LazyStringFileData', function () { expect(roundTripFileData.getOperations()).to.have.length(0) fileData.edit(new TextOperation().insert('a')) - expect(fileData.toRaw()).to.eql({ + expect(fileData.toRaw()).to.deep.equal({ hash: testHash, stringLength: 1, operations: [{ textOperation: ['a'] }], @@ -33,10 +66,15 @@ describe('LazyStringFileData', function () { expect(roundTripFileData.getHash()).not.to.exist // file has changed expect(roundTripFileData.getStringLength()).to.equal(1) expect(roundTripFileData.getOperations()).to.have.length(1) - expect(roundTripFileData.getOperations()[0].ops).to.have.length(1) + expect(roundTripFileData.getOperations()[0]).to.be.instanceOf(TextOperation) + expect( + /** @type {InstanceType} */ ( + roundTripFileData.getOperations()[0] + ).ops + ).to.have.length(1) fileData.edit(new TextOperation().retain(1).insert('b')) - expect(fileData.toRaw()).to.eql({ + expect(fileData.toRaw()).to.deep.equal({ hash: testHash, stringLength: 2, operations: [{ textOperation: ['a'] }, { textOperation: [1, 'b'] }], @@ -45,13 +83,75 @@ describe('LazyStringFileData', function () { expect(roundTripFileData.getHash()).not.to.exist // file has changed expect(roundTripFileData.getStringLength()).to.equal(2) expect(roundTripFileData.getOperations()).to.have.length(2) - expect(roundTripFileData.getOperations()[0].ops).to.have.length(1) - expect(roundTripFileData.getOperations()[1].ops).to.have.length(2) + expect( + /** @type {InstanceType} */ ( + roundTripFileData.getOperations()[0] + ).ops + ).to.have.length(1) + expect( + /** @type {InstanceType} */ ( + roundTripFileData.getOperations()[1] + ).ops + ).to.have.length(2) + }) + + it('should include rangesHash in toRaw and fromRaw when available', function () { + const testHash = File.EMPTY_FILE_HASH + const rangesHash = this.rangesHash + const fileData = new LazyStringFileData(testHash, rangesHash, 19) + + expect(fileData.toRaw()).to.deep.equal({ + hash: testHash, + rangesHash, + stringLength: 19, + }) + + const roundTripFileData = LazyStringFileData.fromRaw(fileData.toRaw()) + expect(roundTripFileData.getHash()).to.equal(testHash) + expect(roundTripFileData.getRangesHash()).to.equal(rangesHash) + expect(roundTripFileData.getStringLength()).to.equal(19) + expect(roundTripFileData.getOperations()).to.have.length(0) + }) + + it('should fetch content from blob store when loading eager string', async function () { + const testHash = this.fileHash + const rangesHash = this.rangesHash + const fileData = new LazyStringFileData(testHash, rangesHash, 19) + const eagerString = await fileData.toEager(this.blobStore) + expect(eagerString).to.be.instanceOf(EagerStringFileData) + expect(eagerString.getContent()).to.equal('the quick brown fox') + expect(eagerString.getComments()).to.deep.equal([ + { id: 'foo', ranges: [{ pos: 0, length: 3 }], resolved: false }, + ]) + expect(eagerString.trackedChanges.toRaw()).to.deep.equal([ + { + range: { pos: 4, length: 5 }, + tracking: { + type: 'delete', + userId: 'user1', + ts: '2024-01-01T00:00:00.000Z', + }, + }, + ]) + expect(this.blobStore.getObject.calledWith(rangesHash)).to.be.true + expect(this.blobStore.getString.calledWith(testHash)).to.be.true + }) + + it('should not fetch ranges from blob store if not present', async function () { + const testHash = this.fileHash + const fileData = new LazyStringFileData(testHash, undefined, 19) + const eagerString = await fileData.toEager(this.blobStore) + expect(eagerString).to.be.instanceOf(EagerStringFileData) + expect(eagerString.getContent()).to.equal('the quick brown fox') + expect(eagerString.getComments()).to.be.empty + expect(eagerString.trackedChanges.length).to.equal(0) + expect(this.blobStore.getObject.called).to.be.false + expect(this.blobStore.getString.calledWith(testHash)).to.be.true }) it('validates operations when edited', function () { const testHash = File.EMPTY_FILE_HASH - const fileData = new LazyStringFileData(testHash, 0) + const fileData = new LazyStringFileData(testHash, undefined, 0) expect(fileData.getHash()).equal(testHash) expect(fileData.getByteLength()).to.equal(0) // approximately expect(fileData.getStringLength()).to.equal(0) @@ -74,7 +174,7 @@ describe('LazyStringFileData', function () { it('validates string length when edited', function () { const testHash = File.EMPTY_FILE_HASH - const fileData = new LazyStringFileData(testHash, 0) + const fileData = new LazyStringFileData(testHash, undefined, 0) expect(fileData.getHash()).equal(testHash) expect(fileData.getByteLength()).to.equal(0) // approximately expect(fileData.getStringLength()).to.equal(0) diff --git a/package-lock.json b/package-lock.json index a1a3924a22..35a98c0544 100644 --- a/package-lock.json +++ b/package-lock.json @@ -454,6 +454,7 @@ "devDependencies": { "chai": "^3.3.0", "mocha": "^10.2.0", + "sinon": "^9.2.4", "typescript": "^5.0.4" } }, @@ -75729,6 +75730,7 @@ "lodash": "^4.17.19", "mocha": "^10.2.0", "p-map": "^4.0.0", + "sinon": "^9.2.4", "typescript": "^5.0.4" }, "dependencies": { diff --git a/services/history-v1/storage/lib/blob_store/index.js b/services/history-v1/storage/lib/blob_store/index.js index 0be6ac29db..b4e8cb3929 100644 --- a/services/history-v1/storage/lib/blob_store/index.js +++ b/services/history-v1/storage/lib/blob_store/index.js @@ -32,13 +32,14 @@ function makeProjectKey(projectId, hash) { return `${projectKey.format(projectId)}/${hash.slice(0, 2)}/${hash.slice(2)}` } -async function uploadBlob(projectId, blob, stream) { +async function uploadBlob(projectId, blob, stream, opts = {}) { const bucket = config.get('blobStore.projectBucket') const key = makeProjectKey(projectId, blob.getHash()) logger.debug({ projectId, blob }, 'uploadBlob started') try { await persistor.sendStream(bucket, key, stream, { contentType: 'application/octet-stream', + ...opts, }) } finally { logger.debug({ projectId, blob }, 'uploadBlob finished') @@ -162,7 +163,7 @@ class BlobStore { * string content. * * @param {string} string - * @return {Promise.} + * @return {Promise.} */ async putString(string) { assert.string(string, 'bad string') @@ -185,7 +186,7 @@ class BlobStore { * temporary file). * * @param {string} pathname - * @return {Promise.} + * @return {Promise.} */ async putFile(pathname) { assert.string(pathname, 'bad pathname') @@ -205,6 +206,27 @@ class BlobStore { } /** + * Stores an object as a gzipped JSON string in a blob. + * + * @param {object} obj + * @returns {Promise.} + */ + async putObject(obj) { + assert.object(obj, 'bad object') + const string = JSON.stringify(obj) + const hash = blobHash.fromString(string) + const stream = await streams.gzipStringToStream(string) + const newBlob = new Blob(hash, Buffer.byteLength(string), string.length) + await uploadBlob(this.projectId, newBlob, stream, { + contentEncoding: 'gzip', + contentType: 'application/json', + }) + await this.backend.insertBlob(this.projectId, newBlob) + return newBlob + } + + /** + * * Fetch a blob's content by its hash as a UTF-8 encoded string. * * @param {string} hash hexadecimal SHA-1 hash @@ -224,6 +246,27 @@ class BlobStore { } } + /** + * Fetch a JSON encoded gzipped blob by its hash, decompress and deserialize + * it. + * + * @template [T=unknown] + * @param {string} hash hexadecimal SHA-1 hash + * @return {Promise.} promise for the content of the file + */ + async getObject(hash) { + assert.blobHash(hash, 'bad hash') + const projectId = this.projectId + logger.debug({ projectId, hash }, 'getObject started') + try { + const stream = await this.getStream(hash) + const buffer = await streams.gunzipStreamToBuffer(stream) + return JSON.parse(buffer.toString()) + } finally { + logger.debug({ projectId, hash }, 'getObject finished') + } + } + /** * Fetch a blob by its hash as a stream. * @@ -252,7 +295,7 @@ class BlobStore { * Read a blob metadata record by hexadecimal hash. * * @param {string} hash hexadecimal SHA-1 hash - * @return {Promise.} + * @return {Promise.} */ async getBlob(hash) { assert.blobHash(hash, 'bad hash') diff --git a/services/history-v1/storage/lib/streams.js b/services/history-v1/storage/lib/streams.js index c36d01da10..9e216c3f97 100644 --- a/services/history-v1/storage/lib/streams.js +++ b/services/history-v1/storage/lib/streams.js @@ -10,6 +10,15 @@ const zlib = require('zlib') const { WritableBuffer, ReadableString } = require('@overleaf/stream-utils') const { pipeline } = require('stream') +/** + * Pipe a read stream to a write stream. The promise resolves when the write + * stream finishes. + * + * @function + * @param {stream.Readable} readStream + * @param {stream.Writable} writeStream + * @return {Promise} + */ function promisePipe(readStream, writeStream) { return new BPromise(function (resolve, reject) { pipeline(readStream, writeStream, function (err) { @@ -22,17 +31,15 @@ function promisePipe(readStream, writeStream) { }) } +exports.promisePipe = promisePipe + /** - * Pipe a read stream to a write stream. The promise resolves when the write - * stream finishes. + * Create a promise for the result of reading a stream to a buffer. * * @function * @param {stream.Readable} readStream - * @param {stream.Writable} writeStream - * @return {Promise} + * @return {Promise.} */ -exports.promisePipe = promisePipe - function readStreamToBuffer(readStream) { return new BPromise(function (resolve, reject) { const bufferStream = new WritableBuffer() @@ -46,15 +53,15 @@ function readStreamToBuffer(readStream) { }) } +exports.readStreamToBuffer = readStreamToBuffer + /** - * Create a promise for the result of reading a stream to a buffer. + * Create a promise for the result of un-gzipping a stream to a buffer. * * @function * @param {stream.Readable} readStream * @return {Promise.} */ -exports.readStreamToBuffer = readStreamToBuffer - function gunzipStreamToBuffer(readStream) { const gunzip = zlib.createGunzip() const bufferStream = new WritableBuffer() @@ -69,15 +76,15 @@ function gunzipStreamToBuffer(readStream) { }) } -/** - * Create a promise for the result of un-gzipping a stream to a buffer. - * - * @function - * @param {stream.Readable} readStream - * @return {Promise.} - */ exports.gunzipStreamToBuffer = gunzipStreamToBuffer +/** + * Create a write stream that gzips the given string. + * + * @function + * @param {string} string + * @return {Promise.} + */ function gzipStringToStream(string) { return new BPromise(function (resolve, reject) { zlib.gzip(Buffer.from(string), function (error, result) { @@ -90,11 +97,4 @@ function gzipStringToStream(string) { }) } -/** - * Create a write stream that gzips the given string. - * - * @function - * @param {string} string - * @return {Promise.} - */ exports.gzipStringToStream = gzipStringToStream