Merge pull request #21670 from overleaf/jpa-mongo-backend-types

[history-v1] add types to mongo BlobStore backend

GitOrigin-RevId: 7d91074eaa781904f7f3b56390aacee1800a7f67
This commit is contained in:
Jakob Ackermann 2024-11-18 17:08:59 +01:00 committed by Copybot
parent 087b612e16
commit 27076c50cc
8 changed files with 56 additions and 12 deletions

View file

@ -94,7 +94,7 @@ class Blob {
/** /**
* Utf-8 length of the blob content, if it appears to be valid UTF-8. * Utf-8 length of the blob content, if it appears to be valid UTF-8.
* @return {?number} * @return {number|undefined}
*/ */
getStringLength() { getStringLength() {
return this.stringLength return this.stringLength

View file

@ -10,7 +10,7 @@ const Change = require('./change')
class ChangeNote { class ChangeNote {
/** /**
* @param {number} baseVersion the new base version for the change * @param {number} baseVersion the new base version for the change
* @param {?Change} change * @param {Change} [change]
*/ */
constructor(baseVersion, change) { constructor(baseVersion, change) {
assert.integer(baseVersion, 'bad baseVersion') assert.integer(baseVersion, 'bad baseVersion')

View file

@ -95,7 +95,7 @@ class File {
/** /**
* @param {number} byteLength * @param {number} byteLength
* @param {number?} stringLength * @param {number} [stringLength]
* @param {Object} [metadata] * @param {Object} [metadata]
* @return {File} * @return {File}
*/ */

View file

@ -47,7 +47,7 @@ class FileData {
/** @see File.createHollow /** @see File.createHollow
* @param {number} byteLength * @param {number} byteLength
* @param {number|null} stringLength * @param {number} [stringLength]
*/ */
static createHollow(byteLength, stringLength) { static createHollow(byteLength, stringLength) {
if (stringLength == null) { if (stringLength == null) {

View file

@ -27,7 +27,7 @@ class V2DocVersions {
} }
/** /**
* @return {?RawV2DocVersions} * @return {RawV2DocVersions|null}
*/ */
toRaw() { toRaw() {
if (!this.data) return null if (!this.data) return null

View file

@ -1,3 +1,4 @@
// @ts-check
/** /**
* Mongo backend for the blob store. * Mongo backend for the blob store.
* *
@ -15,7 +16,7 @@
*/ */
const { Blob } = require('overleaf-editor-core') const { Blob } = require('overleaf-editor-core')
const { ObjectId, Binary } = require('mongodb') const { ObjectId, Binary, MongoError } = require('mongodb')
const assert = require('../assert') const assert = require('../assert')
const mongodb = require('../mongodb') const mongodb = require('../mongodb')
@ -24,6 +25,7 @@ const DUPLICATE_KEY_ERROR_CODE = 11000
/** /**
* Set up the data structures for a given project. * Set up the data structures for a given project.
* @param {string} projectId
*/ */
async function initialize(projectId) { async function initialize(projectId) {
assert.mongoId(projectId, 'bad projectId') assert.mongoId(projectId, 'bad projectId')
@ -33,14 +35,18 @@ async function initialize(projectId) {
blobs: {}, blobs: {},
}) })
} catch (err) { } catch (err) {
if (err.code !== DUPLICATE_KEY_ERROR_CODE) { if (err instanceof MongoError && err.code === DUPLICATE_KEY_ERROR_CODE) {
throw err return // ignore already initialized case
} }
throw err
} }
} }
/** /**
* Return blob metadata for the given project and hash. * Return blob metadata for the given project and hash.
* @param {string} projectId
* @param {string} hash
* @return {Promise<Blob | null>}
*/ */
async function findBlob(projectId, hash) { async function findBlob(projectId, hash) {
assert.mongoId(projectId, 'bad projectId') assert.mongoId(projectId, 'bad projectId')
@ -69,6 +75,9 @@ async function findBlob(projectId, hash) {
/** /**
* Search in the sharded collection for blob metadata * Search in the sharded collection for blob metadata
* @param {string} projectId
* @param {string} hash
* @return {Promise<Blob | null>}
*/ */
async function findBlobSharded(projectId, hash) { async function findBlobSharded(projectId, hash) {
const [shard, bucket] = getShardedBucket(hash) const [shard, bucket] = getShardedBucket(hash)
@ -81,11 +90,15 @@ async function findBlobSharded(projectId, hash) {
return null return null
} }
const record = result.blobs.find(blob => blob.h.toString('hex') === hash) const record = result.blobs.find(blob => blob.h.toString('hex') === hash)
if (!record) return null
return recordToBlob(record) return recordToBlob(record)
} }
/** /**
* Read multiple blob metadata records by hexadecimal hashes. * Read multiple blob metadata records by hexadecimal hashes.
* @param {string} projectId
* @param {Array<string>} hashes
* @return {Promise<Array<Blob>>}
*/ */
async function findBlobs(projectId, hashes) { async function findBlobs(projectId, hashes) {
assert.mongoId(projectId, 'bad projectId') assert.mongoId(projectId, 'bad projectId')
@ -135,6 +148,9 @@ async function findBlobs(projectId, hashes) {
/** /**
* Search in the sharded collection for blob metadata. * Search in the sharded collection for blob metadata.
* @param {string} projectId
* @param {Set<string>} hashSet
* @return {Promise<Array<Blob>>}
*/ */
async function findBlobsSharded(projectId, hashSet) { async function findBlobsSharded(projectId, hashSet) {
// Build a map of buckets by shard key // Build a map of buckets by shard key
@ -183,6 +199,8 @@ async function findBlobsSharded(projectId, hashSet) {
/** /**
* Add a blob's metadata to the blobs collection after it has been uploaded. * Add a blob's metadata to the blobs collection after it has been uploaded.
* @param {string} projectId
* @param {Blob} blob
*/ */
async function insertBlob(projectId, blob) { async function insertBlob(projectId, blob) {
assert.mongoId(projectId, 'bad projectId') assert.mongoId(projectId, 'bad projectId')
@ -208,6 +226,10 @@ async function insertBlob(projectId, blob) {
/** /**
* Add a blob's metadata to the sharded blobs collection. * Add a blob's metadata to the sharded blobs collection.
* @param {string} projectId
* @param {string} hash
* @param {Record} record
* @return {Promise<void>}
*/ */
async function insertRecordSharded(projectId, hash, record) { async function insertRecordSharded(projectId, hash, record) {
const [shard, bucket] = getShardedBucket(hash) const [shard, bucket] = getShardedBucket(hash)
@ -221,6 +243,7 @@ async function insertRecordSharded(projectId, hash, record) {
/** /**
* Delete all blobs for a given project. * Delete all blobs for a given project.
* @param {string} projectId
*/ */
async function deleteBlobs(projectId) { async function deleteBlobs(projectId) {
assert.mongoId(projectId, 'bad projectId') assert.mongoId(projectId, 'bad projectId')
@ -228,12 +251,15 @@ async function deleteBlobs(projectId) {
const minShardedId = makeShardedId(projectId, '0') const minShardedId = makeShardedId(projectId, '0')
const maxShardedId = makeShardedId(projectId, 'f') const maxShardedId = makeShardedId(projectId, 'f')
await mongodb.shardedBlobs.deleteMany({ await mongodb.shardedBlobs.deleteMany({
// @ts-ignore We are using a custom _id here.
_id: { $gte: minShardedId, $lte: maxShardedId }, _id: { $gte: minShardedId, $lte: maxShardedId },
}) })
} }
/** /**
* Return the Mongo path to the bucket for the given hash. * Return the Mongo path to the bucket for the given hash.
* @param {string} hash
* @return {string}
*/ */
function getBucket(hash) { function getBucket(hash) {
return `blobs.${hash.slice(0, 3)}` return `blobs.${hash.slice(0, 3)}`
@ -242,6 +268,8 @@ function getBucket(hash) {
/** /**
* Return the shard key and Mongo path to the bucket for the given hash in the * Return the shard key and Mongo path to the bucket for the given hash in the
* sharded collection. * sharded collection.
* @param {string} hash
* @return {[string, string]}
*/ */
function getShardedBucket(hash) { function getShardedBucket(hash) {
const shard = hash.slice(0, 1) const shard = hash.slice(0, 1)
@ -251,13 +279,25 @@ function getShardedBucket(hash) {
/** /**
* Create an _id key for the sharded collection. * Create an _id key for the sharded collection.
* @param {string} projectId
* @param {string} shard
* @return {Binary}
*/ */
function makeShardedId(projectId, shard) { function makeShardedId(projectId, shard) {
return new Binary(Buffer.from(`${projectId}0${shard}`, 'hex')) return new Binary(Buffer.from(`${projectId}0${shard}`, 'hex'))
} }
/**
* @typedef {Object} Record
* @property {Binary} h
* @property {number} b
* @property {number} [s]
*/
/** /**
* Return the Mongo record for the given blob. * Return the Mongo record for the given blob.
* @param {Blob} blob
* @return {Record}
*/ */
function blobToRecord(blob) { function blobToRecord(blob) {
const hash = blob.getHash() const hash = blob.getHash()
@ -272,11 +312,10 @@ function blobToRecord(blob) {
/** /**
* Create a blob from the given Mongo record. * Create a blob from the given Mongo record.
* @param {Record} record
* @return {Blob}
*/ */
function recordToBlob(record) { function recordToBlob(record) {
if (record == null) {
return
}
return new Blob(record.h.toString('hex'), record.b, record.s) return new Blob(record.h.toString('hex'), record.b, record.s)
} }

View file

@ -49,7 +49,7 @@ class ProjectArchive {
/** /**
* @constructor * @constructor
* @param {Snapshot} snapshot * @param {Snapshot} snapshot
* @param {?number} timeout in ms * @param {number} [timeout] in ms
* @classdesc * @classdesc
* Writes the project snapshot to a zip file. * Writes the project snapshot to a zip file.
*/ */

View file

@ -88,6 +88,11 @@ describe('BlobStore', function () {
await blobStore2.initialize() await blobStore2.initialize()
}) })
it('can initialize a project again without throwing an error', async function () {
await blobStore.initialize()
await blobStore2.initialize()
})
it('can store and fetch string content', async function () { it('can store and fetch string content', async function () {
function checkBlob(blob) { function checkBlob(blob) {
expect(blob.getHash()).to.equal(helloWorldHash) expect(blob.getHash()).to.equal(helloWorldHash)