Avoid duplicating a math-closing dollar sign (#11227)

GitOrigin-RevId: ef2ef77e26df59d1af3df6dc664e284d3c70102d
This commit is contained in:
Alf Eaton 2023-01-13 12:42:29 +00:00 committed by Copybot
parent 377a8aed60
commit ee85d948e2
268 changed files with 57782 additions and 0 deletions

View file

@ -0,0 +1 @@
node_modules/

View file

@ -0,0 +1,5 @@
/coverage
/node_modules
# managed by monorepo$ bin/update_build_scripts
.npmrc

View file

@ -0,0 +1 @@
16.17.1

View file

@ -0,0 +1,9 @@
overleaf-editor-core
--dependencies=None
--docker-repos=gcr.io/overleaf-ops
--env-add=
--env-pass-through=
--is-library=True
--node-version=16.17.1
--public-repo=False
--script-version=4.1.0

View file

@ -0,0 +1,26 @@
exports.Author = require('./lib/author')
exports.AuthorList = require('./lib/author_list')
exports.Blob = require('./lib/blob')
exports.Change = require('./lib/change')
exports.ChangeRequest = require('./lib/change_request')
exports.ChangeNote = require('./lib/change_note')
exports.Chunk = require('./lib/chunk')
exports.ChunkResponse = require('./lib/chunk_response')
exports.File = require('./lib/file')
exports.FileMap = require('./lib/file_map')
exports.History = require('./lib/history')
exports.Label = require('./lib/label')
exports.AddFileOperation = require('./lib/operation/add_file_operation')
exports.MoveFileOperation = require('./lib/operation/move_file_operation')
exports.EditFileOperation = require('./lib/operation/edit_file_operation')
exports.SetFileMetadataOperation = require('./lib/operation/set_file_metadata_operation')
exports.NoOperation = require('./lib/operation/no_operation')
exports.Operation = require('./lib/operation')
exports.RestoreOrigin = require('./lib/origin/restore_origin')
exports.Origin = require('./lib/origin')
exports.OtClient = require('./lib/ot_client')
exports.TextOperation = require('./lib/operation/text_operation')
exports.safePathname = require('./lib/safe_pathname')
exports.Snapshot = require('./lib/snapshot')
exports.util = require('./lib/util')
exports.V2DocVersions = require('./lib/v2_doc_versions')

View file

@ -0,0 +1,70 @@
'use strict'
const assert = require('check-types').assert
/**
* @constructor
* @param {number} id
* @param {string} email
* @param {string} name
* @classdesc
* An author of a {@link Change}. We want to store user IDs, and then fill in
* the other properties (which the user can change over time) when changes are
* loaded.
*
* At present, we're assuming that all authors have a user ID; we may need to
* generalise this to cover users for whom we only have a name and email, e.g.
* from git. For now, though, this seems to do what we need.
*/
function Author(id, email, name) {
assert.number(id, 'bad id')
assert.string(email, 'bad email')
assert.string(name, 'bad name')
this.id = id
this.email = email
this.name = name
}
/**
* Create an Author from its raw form.
*
* @param {Object} [raw]
* @return {Author | null}
*/
Author.fromRaw = function authorFromRaw(raw) {
if (!raw) return null
return new Author(raw.id, raw.email, raw.name)
}
/**
* Convert the Author to raw form for storage or transmission.
*
* @return {Object}
*/
Author.prototype.toRaw = function authorToRaw() {
return { id: this.id, email: this.email, name: this.name }
}
/**
* @return {number}
*/
Author.prototype.getId = function () {
return this.id
}
/**
* @return {string}
*/
Author.prototype.getEmail = function () {
return this.email
}
/**
* @return {string}
*/
Author.prototype.getName = function () {
return this.name
}
module.exports = Author

View file

@ -0,0 +1,45 @@
/** @module */
'use strict'
const _ = require('lodash')
const check = require('check-types')
const Author = require('./author')
/**
* Check that every member of the list is a number or every member is
* an Author value, disregarding null or undefined values.
*
* @param {Array.<number|Author>} authors author list
* @param {string} msg
*/
function assertV1(authors, msg) {
const authors_ = authors.filter(function (a) {
return a !== null && a !== undefined
})
if (authors_.length > 0) {
const checker = check.integer(authors_[0])
? check.assert.integer
: _.partial(check.assert.instance, _, Author)
_.each(authors_, function (author) {
checker(author, msg)
})
}
}
/**
* Check that every member of the list is a v2 author ID, disregarding
* null or undefined values.
*
* @param {Array.<string>} authors author list
* @param {string} msg
*/
function assertV2(authors, msg) {
_.each(authors, function (author) {
check.assert.maybe.match(author, /^[0-9a-f]{24}$/, msg)
})
}
module.exports = { assertV1: assertV1, assertV2: assertV2 }

View file

@ -0,0 +1,100 @@
'use strict'
const assert = require('check-types').assert
const OError = require('@overleaf/o-error')
const TextOperation = require('./operation/text_operation')
/**
* @constructor
* @classdesc
* Metadata record for the content of a file.
*/
function Blob(hash, byteLength, stringLength) {
this.setHash(hash)
this.setByteLength(byteLength)
this.setStringLength(stringLength)
}
class NotFoundError extends OError {
constructor(hash) {
super(`blob ${hash} not found`, { hash })
this.hash = hash
}
}
Blob.NotFoundError = NotFoundError
Blob.HEX_HASH_RX_STRING = '^[0-9a-f]{40,40}$'
Blob.HEX_HASH_RX = new RegExp(Blob.HEX_HASH_RX_STRING)
module.exports = Blob
Blob.fromRaw = function blobFromRaw(raw) {
if (raw) {
return new Blob(raw.hash, raw.byteLength, raw.stringLength)
}
return null
}
Blob.prototype.toRaw = function blobToRaw() {
return {
hash: this.hash,
byteLength: this.byteLength,
stringLength: this.stringLength,
}
}
/**
* Hex hash.
* @return {?String}
*/
Blob.prototype.getHash = function () {
return this.hash
}
Blob.prototype.setHash = function (hash) {
assert.maybe.match(hash, Blob.HEX_HASH_RX, 'bad hash')
this.hash = hash
}
/**
* Length of the blob in bytes.
* @return {number}
*/
Blob.prototype.getByteLength = function () {
return this.byteLength
}
Blob.prototype.setByteLength = function (byteLength) {
assert.maybe.integer(byteLength, 'bad byteLength')
this.byteLength = byteLength
}
/**
* Utf-8 length of the blob content, if it appears to be valid UTF-8.
* @return {?number}
*/
Blob.prototype.getStringLength = function () {
return this.stringLength
}
Blob.prototype.setStringLength = function (stringLength) {
assert.maybe.integer(stringLength, 'bad stringLength')
this.stringLength = stringLength
}
/**
* Size of the largest file that we'll read to determine whether we can edit it
* or not, in bytes. The final decision on whether a file is editable or not is
* based on the number of characters it contains, but we need to read the file
* in to determine that; so it is useful to have an upper bound on the byte
* length of a file that might be editable.
*
* The reason for the factor of 3 is as follows. We cannot currently edit files
* that contain characters outside of the basic multilingual plane, so we're
* limited to characters that can be represented in a single, two-byte UCS-2
* code unit. Encoding the largest such value, 0xFFFF (which is not actually
* a valid character), takes three bytes in UTF-8: 0xEF 0xBF 0xBF. A file
* composed entirely of three-byte UTF-8 codepoints is the worst case; in
* practice, this is a very conservative upper bound.
*
* @type {number}
*/
Blob.MAX_EDITABLE_BYTE_LENGTH_BOUND = 3 * TextOperation.MAX_STRING_LENGTH

View file

@ -0,0 +1,333 @@
'use strict'
const _ = require('lodash')
const assert = require('check-types').assert
const BPromise = require('bluebird')
const AuthorList = require('./author_list')
const Operation = require('./operation')
const Origin = require('./origin')
const Snapshot = require('./snapshot')
const FileMap = require('./file_map')
const V2DocVersions = require('./v2_doc_versions')
/**
* @typedef {import("./author")} Author
* @typedef {import("./types").BlobStore} BlobStore
*/
/**
* @classdesc
* A Change is a list of {@link Operation}s applied atomically by given
* {@link Author}(s) at a given time.
*/
class Change {
/**
* @constructor
* @param {Array.<Operation>} operations
* @param {Date} timestamp
* @param {number[] | Author[]} [authors]
* @param {Origin} [origin]
* @param {string[]} [v2Authors]
* @param {string} [projectVersion]
* @param {V2DocVersions} [v2DocVersions]
*/
constructor(
operations,
timestamp,
authors,
origin,
v2Authors,
projectVersion,
v2DocVersions
) {
this.setOperations(operations)
this.setTimestamp(timestamp)
this.setAuthors(authors || [])
this.setOrigin(origin)
this.setV2Authors(v2Authors || [])
this.setProjectVersion(projectVersion)
this.setV2DocVersions(v2DocVersions)
}
/**
* For serialization.
*
* @return {Object}
*/
toRaw() {
function toRaw(object) {
return object.toRaw()
}
const raw = {
operations: this.operations.map(toRaw),
timestamp: this.timestamp.toISOString(),
authors: this.authors,
}
if (this.v2Authors) raw.v2Authors = this.v2Authors
if (this.origin) raw.origin = this.origin.toRaw()
if (this.projectVersion) raw.projectVersion = this.projectVersion
if (this.v2DocVersions) raw.v2DocVersions = this.v2DocVersions.toRaw()
return raw
}
static fromRaw(raw) {
if (!raw) return null
assert.array.of.object(raw.operations, 'bad raw.operations')
assert.nonEmptyString(raw.timestamp, 'bad raw.timestamp')
// Hack to clean up bad data where author id of some changes was 0, instead of
// null. The root cause of the bug is fixed in
// https://github.com/overleaf/write_latex/pull/3804 but the bad data persists
// on S3
let authors
if (raw.authors) {
authors = raw.authors.map(
// Null represents an anonymous author
author => (author === 0 ? null : author)
)
}
return new Change(
raw.operations.map(Operation.fromRaw),
new Date(raw.timestamp),
authors,
raw.origin && Origin.fromRaw(raw.origin),
raw.v2Authors,
raw.projectVersion,
raw.v2DocVersions && V2DocVersions.fromRaw(raw.v2DocVersions)
)
}
getOperations() {
return this.operations
}
setOperations(operations) {
assert.array.of.object(operations, 'Change: bad operations')
this.operations = operations
}
getTimestamp() {
return this.timestamp
}
setTimestamp(timestamp) {
assert.date(timestamp, 'Change: bad timestamp')
this.timestamp = timestamp
}
/**
* @return {Array.<Author>} zero or more
*/
getAuthors() {
return this.authors
}
setAuthors(authors) {
assert.array(authors, 'Change: bad author ids array')
if (authors.length > 1) {
assert.maybe.emptyArray(
this.v2Authors,
'Change: cannot set v1 authors if v2 authors is set'
)
}
AuthorList.assertV1(authors, 'Change: bad author ids')
this.authors = authors
}
/**
* @return {Array.<Author>} zero or more
*/
getV2Authors() {
return this.v2Authors
}
setV2Authors(v2Authors) {
assert.array(v2Authors, 'Change: bad v2 author ids array')
if (v2Authors.length > 1) {
assert.maybe.emptyArray(
this.authors,
'Change: cannot set v2 authors if v1 authors is set'
)
}
AuthorList.assertV2(v2Authors, 'Change: not a v2 author id')
this.v2Authors = v2Authors
}
/**
* @return {Origin | null | undefined}
*/
getOrigin() {
return this.origin
}
setOrigin(origin) {
assert.maybe.instance(origin, Origin, 'Change: bad origin')
this.origin = origin
}
/**
* @return {string | null | undefined}
*/
getProjectVersion() {
return this.projectVersion
}
setProjectVersion(projectVersion) {
assert.maybe.match(
projectVersion,
Change.PROJECT_VERSION_RX,
'Change: bad projectVersion'
)
this.projectVersion = projectVersion
}
/**
* @return {V2DocVersions | null | undefined}
*/
getV2DocVersions() {
return this.v2DocVersions
}
setV2DocVersions(v2DocVersions) {
assert.maybe.instance(
v2DocVersions,
V2DocVersions,
'Change: bad v2DocVersions'
)
this.v2DocVersions = v2DocVersions
}
/**
* If this Change references blob hashes, add them to the given set.
*
* @param {Set.<String>} blobHashes
*/
findBlobHashes(blobHashes) {
for (const operation of this.operations) {
operation.findBlobHashes(blobHashes)
}
}
/**
* If this Change contains any File objects, load them.
*
* @param {string} kind see {File#load}
* @param {BlobStore} blobStore
* @return {Promise}
*/
loadFiles(kind, blobStore) {
return BPromise.each(this.operations, operation =>
operation.loadFiles(kind, blobStore)
)
}
/**
* Append an operation to the end of the operations list.
*
* @param {Operation} operation
* @return {this}
*/
pushOperation(operation) {
this.getOperations().push(operation)
return this
}
/**
* Apply this change to a snapshot. All operations are applied, and then the
* snapshot version is increased.
*
* Recoverable errors (caused by historical bad data) are ignored unless
* opts.strict is true
*
* @param {Snapshot} snapshot modified in place
* @param {object} opts
* @param {boolean} [opts.strict] - Do not ignore recoverable errors
*/
applyTo(snapshot, opts = {}) {
assert.object(snapshot, 'bad snapshot')
for (const operation of this.operations) {
try {
operation.applyTo(snapshot, opts)
} catch (err) {
const recoverable =
err instanceof Snapshot.EditMissingFileError ||
err instanceof FileMap.FileNotFoundError
if (!recoverable || opts.strict) {
throw err
}
}
}
// update project version if present in change
if (this.projectVersion) {
snapshot.setProjectVersion(this.projectVersion)
}
// update doc versions
if (this.v2DocVersions) {
snapshot.updateV2DocVersions(this.v2DocVersions)
}
}
/**
* Transform this change to account for the fact that the other change occurred
* simultaneously and was applied first.
*
* This change is modified in place (by transforming its operations).
*
* @param {Change} other
*/
transformAfter(other) {
assert.object(other, 'bad other')
const thisOperations = this.getOperations()
const otherOperations = other.getOperations()
for (let i = 0; i < otherOperations.length; ++i) {
for (let j = 0; j < thisOperations.length; ++j) {
thisOperations[j] = Operation.transform(
thisOperations[j],
otherOperations[i]
)[0]
}
}
}
clone() {
return Change.fromRaw(this.toRaw())
}
store(blobStore, concurrency) {
assert.maybe.number(concurrency, 'bad concurrency')
const raw = this.toRaw()
raw.authors = _.uniq(raw.authors)
return BPromise.map(
this.operations,
operation => operation.store(blobStore),
{ concurrency: concurrency || 1 }
).then(rawOperations => {
raw.operations = rawOperations
return raw
})
}
canBeComposedWith(other) {
const operations = this.getOperations()
const otherOperations = other.getOperations()
// We ignore complex changes with more than 1 operation
if (operations.length > 1 || otherOperations.length > 1) return false
return operations[0].canBeComposedWith(otherOperations[0])
}
}
Change.PROJECT_VERSION_RX_STRING = '^[0-9]+\\.[0-9]+$'
Change.PROJECT_VERSION_RX = new RegExp(Change.PROJECT_VERSION_RX_STRING)
module.exports = Change

View file

@ -0,0 +1,60 @@
'use strict'
const assert = require('check-types').assert
const Change = require('./change')
/**
* @constructor
* @param {number} baseVersion the new base version for the change
* @param {?Change} change
* @classdesc
* A `ChangeNote` is returned when the server has applied a {@link Change}.
*/
function ChangeNote(baseVersion, change) {
assert.integer(baseVersion, 'bad baseVersion')
assert.maybe.instance(change, Change, 'bad change')
this.baseVersion = baseVersion
this.change = change
}
module.exports = ChangeNote
/**
* For serialization.
*
* @return {Object}
*/
ChangeNote.prototype.toRaw = function changeNoteToRaw() {
return {
baseVersion: this.baseVersion,
change: this.change.toRaw(),
}
}
ChangeNote.prototype.toRawWithoutChange =
function changeNoteToRawWithoutChange() {
return {
baseVersion: this.baseVersion,
}
}
ChangeNote.fromRaw = function changeNoteFromRaw(raw) {
assert.integer(raw.baseVersion, 'bad raw.baseVersion')
assert.maybe.object(raw.change, 'bad raw.changes')
return new ChangeNote(raw.baseVersion, Change.fromRaw(raw.change))
}
ChangeNote.prototype.getBaseVersion = function () {
return this.baseVersion
}
ChangeNote.prototype.getResultVersion = function () {
return this.baseVersion + 1
}
ChangeNote.prototype.getChange = function () {
return this.change
}

View file

@ -0,0 +1,90 @@
'use strict'
const assert = require('check-types').assert
const AuthorList = require('./author_list')
const Change = require('./change')
const Operation = require('./operation')
/**
* @typedef {import("./author")} Author
*/
/**
* @constructor
* @param {number} baseVersion
* @param {Array.<Operation>} operations
* @param {boolean} [untransformable]
* @param {number[] | Author[]} [authors]
* @classdesc
* A `ChangeRequest` is a list of {@link Operation}s that the server can apply
* as a {@link Change}.
*
* If the change is marked as `untransformable`, then the server will not
* attempt to transform it if it is out of date (i.e. if the baseVersion no
* longer matches the project's latest version). For example, if the client
* needs to ensure that a metadata property is set on exactly one file, it can't
* do that reliably if there's a chance that other clients will also change the
* metadata at the same time. The expectation is that if the change is rejected,
* the client will retry on a later version.
*/
function ChangeRequest(baseVersion, operations, untransformable, authors) {
assert.integer(baseVersion, 'bad baseVersion')
assert.array.of.object(operations, 'bad operations')
assert.maybe.boolean(untransformable, 'ChangeRequest: bad untransformable')
// TODO remove authors once we have JWTs working --- pass as parameter to
// makeChange instead
authors = authors || []
// check all are the same type
AuthorList.assertV1(authors, 'bad authors')
this.authors = authors
this.baseVersion = baseVersion
this.operations = operations
this.untransformable = untransformable || false
}
module.exports = ChangeRequest
/**
* For serialization.
*
* @return {Object}
*/
ChangeRequest.prototype.toRaw = function changeRequestToRaw() {
function operationToRaw(operation) {
return operation.toRaw()
}
return {
baseVersion: this.baseVersion,
operations: this.operations.map(operationToRaw),
untransformable: this.untransformable,
authors: this.authors,
}
}
ChangeRequest.fromRaw = function changeRequestFromRaw(raw) {
assert.array.of.object(raw.operations, 'bad raw.operations')
return new ChangeRequest(
raw.baseVersion,
raw.operations.map(Operation.fromRaw),
raw.untransformable,
raw.authors
)
}
ChangeRequest.prototype.getBaseVersion = function () {
return this.baseVersion
}
ChangeRequest.prototype.isUntransformable = function () {
return this.untransformable
}
ChangeRequest.prototype.makeChange = function changeRequestMakeChange(
timestamp
) {
return new Change(this.operations, timestamp, this.authors)
}

View file

@ -0,0 +1,166 @@
'use strict'
const assert = require('check-types').assert
const OError = require('@overleaf/o-error')
const History = require('./history')
/**
* @typedef {import("./types").BlobStore} BlobStore
* @typedef {import("./change")} Change
* @typedef {import("./snapshot")} Snapshot
*/
/**
* @constructor
* @param {History} history
* @param {number} startVersion
*
* @classdesc
* A Chunk is a {@link History} that is part of a project's overall history. It
* has a start and an end version that place its History in context.
*/
function Chunk(history, startVersion) {
assert.instance(history, History, 'bad history')
assert.integer(startVersion, 'bad startVersion')
this.history = history
this.startVersion = startVersion
}
class ConflictingEndVersion extends OError {
constructor(clientEndVersion, latestEndVersion) {
const message =
'client sent updates with end_version ' +
clientEndVersion +
' but latest chunk has end_version ' +
latestEndVersion
super(message, { clientEndVersion, latestEndVersion })
this.clientEndVersion = clientEndVersion
this.latestEndVersion = latestEndVersion
}
}
Chunk.ConflictingEndVersion = ConflictingEndVersion
class NotFoundError extends OError {
// `message` and `info` optional arguments allow children classes to override
// these values, ensuring backwards compatibility with previous implementation
// based on the `overleaf-error-type` library
constructor(projectId, message, info) {
const errorMessage = message || `no chunks for project ${projectId}`
const errorInfo = info || { projectId }
super(errorMessage, errorInfo)
this.projectId = projectId
}
}
Chunk.NotFoundError = NotFoundError
class VersionNotFoundError extends NotFoundError {
constructor(projectId, version) {
super(projectId, `chunk for ${projectId} v ${version} not found`, {
projectId,
version,
})
this.projectId = projectId
this.version = version
}
}
Chunk.VersionNotFoundError = VersionNotFoundError
class BeforeTimestampNotFoundError extends NotFoundError {
constructor(projectId, timestamp) {
super(projectId, `chunk for ${projectId} timestamp ${timestamp} not found`)
this.projectId = projectId
this.timestamp = timestamp
}
}
Chunk.BeforeTimestampNotFoundError = BeforeTimestampNotFoundError
class NotPersistedError extends NotFoundError {
constructor(projectId) {
super(projectId, `chunk for ${projectId} not persisted yet`)
this.projectId = projectId
}
}
Chunk.NotPersistedError = NotPersistedError
Chunk.fromRaw = function chunkFromRaw(raw) {
return new Chunk(History.fromRaw(raw.history), raw.startVersion)
}
Chunk.prototype.toRaw = function chunkToRaw() {
return { history: this.history.toRaw(), startVersion: this.startVersion }
}
/**
* The history for this chunk.
*
* @return {History}
*/
Chunk.prototype.getHistory = function () {
return this.history
}
/**
* {@see History#getSnapshot}
* @return {Snapshot}
*/
Chunk.prototype.getSnapshot = function () {
return this.history.getSnapshot()
}
/**
* {@see History#getChanges}
* @return {Array.<Change>}
*/
Chunk.prototype.getChanges = function () {
return this.history.getChanges()
}
/**
* {@see History#pushChanges}
* @param {Array.<Change>} changes
*/
Chunk.prototype.pushChanges = function chunkPushChanges(changes) {
this.history.pushChanges(changes)
}
/**
* The version of the project after applying all changes in this chunk.
*
* @return {number} non-negative, greater than or equal to start version
*/
Chunk.prototype.getEndVersion = function chunkGetEndVersion() {
return this.startVersion + this.history.countChanges()
}
/**
* The timestamp of the last change in this chunk
*/
Chunk.prototype.getEndTimestamp = function getEndTimestamp() {
if (!this.history.countChanges()) return null
return this.history.getChanges().slice(-1)[0].getTimestamp()
}
/**
* The version of the project before applying all changes in this chunk.
*
* @return {number} non-negative, less than or equal to end version
*/
Chunk.prototype.getStartVersion = function () {
return this.startVersion
}
/**
* {@see History#loadFiles}
*
* @param {string} kind
* @param {BlobStore} blobStore
* @return {Promise}
*/
Chunk.prototype.loadFiles = function chunkLoadFiles(kind, blobStore) {
return this.history.loadFiles(kind, blobStore)
}
module.exports = Chunk

View file

@ -0,0 +1,32 @@
'use strict'
const assert = require('check-types').assert
const Chunk = require('./chunk')
//
// The ChunkResponse allows for additional data to be sent back with the chunk
// at present there are no extra data to send.
//
function ChunkResponse(chunk) {
assert.instance(chunk, Chunk)
this.chunk = chunk
}
ChunkResponse.prototype.toRaw = function chunkResponseToRaw() {
return {
chunk: this.chunk.toRaw(),
}
}
ChunkResponse.fromRaw = function chunkResponseFromRaw(raw) {
if (!raw) return null
return new ChunkResponse(Chunk.fromRaw(raw.chunk))
}
ChunkResponse.prototype.getChunk = function () {
return this.chunk
}
module.exports = ChunkResponse

View file

@ -0,0 +1,241 @@
'use strict'
const _ = require('lodash')
const assert = require('check-types').assert
const OError = require('@overleaf/o-error')
const FileData = require('./file_data')
const HashFileData = require('./file_data/hash_file_data')
const StringFileData = require('./file_data/string_file_data')
/**
* @typedef {import("./blob")} Blob
* @typedef {import("./types").BlobStore} BlobStore
* @typedef {import("./types").StringFileRawData} StringFileRawData
* @typedef {import("./operation/text_operation")} TextOperation
*/
/**
* @template T
* @typedef {import("bluebird")<T>} BPromise
*/
/**
* @constructor
* @param {FileData} data
* @param {Object} [metadata]
*
* @classdesc
* A file in a {@link Snapshot}. A file has both data and metadata. There
* are several classes of data that represent the various types of file
* data that are supported, namely text and binary, and also the various
* states that a file's data can be in, namely:
*
* 1. Hash only: all we know is the file's hash; this is how we encode file
* content in long term storage.
* 1. Lazily loaded: the hash of the file, its length, and its type are known,
* but its content is not loaded. Operations are cached for application
* later.
* 1. Eagerly loaded: the content of a text file is fully loaded into memory
* as a string.
* 1. Hollow: only the byte and/or UTF-8 length of the file are known; this is
* used to allow for validation of operations when editing collaboratively
* without having to keep file data in memory on the server.
*/
function File(data, metadata) {
assert.instance(data, FileData, 'File: bad data')
this.data = data
this.setMetadata(metadata || {})
}
File.fromRaw = function fileFromRaw(raw) {
if (!raw) return null
return new File(FileData.fromRaw(raw), raw.metadata)
}
/**
* @param {string} hash
* @param {Object} [metadata]
* @return {File}
*/
File.fromHash = function fileFromHash(hash, metadata) {
return new File(new HashFileData(hash), metadata)
}
/**
* @param {string} string
* @param {Object} [metadata]
* @return {File}
*/
File.fromString = function fileFromString(string, metadata) {
return new File(new StringFileData(string), metadata)
}
/**
* @param {number} [byteLength]
* @param {number} [stringLength]
* @param {Object} [metadata]
* @return {File}
*/
File.createHollow = function fileCreateHollow(
byteLength,
stringLength,
metadata
) {
return new File(FileData.createHollow(byteLength, stringLength), metadata)
}
/**
* @param {Blob} blob
* @param {Object} [metadata]
* @return {File}
*/
File.createLazyFromBlob = function fileCreateLazyFromBlob(blob, metadata) {
return new File(FileData.createLazyFromBlob(blob), metadata)
}
function storeRawMetadata(metadata, raw) {
if (!_.isEmpty(metadata)) {
raw.metadata = _.cloneDeep(metadata)
}
}
File.prototype.toRaw = function () {
const rawFileData = this.data.toRaw()
storeRawMetadata(this.metadata, rawFileData)
return rawFileData
}
/**
* Hexadecimal SHA-1 hash of the file's content, if known.
*
* @return {string | null | undefined}
*/
File.prototype.getHash = function () {
return this.data.getHash()
}
/**
* The content of the file, if it is known and if this file has UTF-8 encoded
* content.
*
* @return {string | null | undefined}
*/
File.prototype.getContent = function () {
return this.data.getContent()
}
/**
* Whether this file has string content and is small enough to be edited using
* {@link TextOperation}s.
*
* @return {boolean | null | undefined} null if it is not currently known
*/
File.prototype.isEditable = function () {
return this.data.isEditable()
}
/**
* The length of the file's content in bytes, if known.
*
* @return {number | null | undefined}
*/
File.prototype.getByteLength = function () {
return this.data.getByteLength()
}
/**
* The length of the file's content in characters, if known.
*
* @return {number | null | undefined}
*/
File.prototype.getStringLength = function () {
return this.data.getStringLength()
}
/**
* Return the metadata object for this file.
*
* @return {Object}
*/
File.prototype.getMetadata = function () {
return this.metadata
}
/**
* Set the metadata object for this file.
*
* @param {Object} metadata
*/
File.prototype.setMetadata = function (metadata) {
assert.object(metadata, 'File: bad metadata')
this.metadata = metadata
}
class NotEditableError extends OError {
constructor() {
super('File is not editable')
}
}
File.NotEditableError = NotEditableError
/**
* Edit this file, if possible.
*
* @param {TextOperation} textOperation
*/
File.prototype.edit = function (textOperation) {
if (!this.data.isEditable()) throw new File.NotEditableError()
this.data.edit(textOperation)
}
/**
* Clone a file.
*
* @return {File} a new object of the same type
*/
File.prototype.clone = function fileClone() {
return File.fromRaw(this.toRaw())
}
/**
* Convert this file's data to the given kind. This may require us to load file
* size or content from the given blob store, so this is an asynchronous
* operation.
*
* @param {string} kind
* @param {BlobStore} blobStore
* @return {Promise.<File>} for this
*/
File.prototype.load = function (kind, blobStore) {
return this.data.load(kind, blobStore).then(data => {
this.data = data
return this
})
}
/**
* Store the file's content in the blob store and return a raw file with
* the corresponding hash. As a side effect, make this object consistent with
* the hash.
*
* @param {BlobStore} blobStore
* @return {BPromise<Object>} a raw HashFile
*/
File.prototype.store = function (blobStore) {
return this.data.store(blobStore).then(raw => {
storeRawMetadata(this.metadata, raw)
return raw
})
}
/**
* Blob hash for an empty file.
*
* @type {String}
*/
File.EMPTY_FILE_HASH = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391'
module.exports = File

View file

@ -0,0 +1,71 @@
'use strict'
const assert = require('check-types').assert
const BPromise = require('bluebird')
const Blob = require('../blob')
const FileData = require('./')
class BinaryFileData extends FileData {
/**
* @constructor
* @param {string} hash
* @param {number} byteLength
* @see FileData
*/
constructor(hash, byteLength) {
super()
assert.match(hash, Blob.HEX_HASH_RX, 'BinaryFileData: bad hash')
assert.integer(byteLength, 'BinaryFileData: bad byteLength')
assert.greaterOrEqual(byteLength, 0, 'BinaryFileData: low byteLength')
this.hash = hash
this.byteLength = byteLength
}
static fromRaw(raw) {
return new BinaryFileData(raw.hash, raw.byteLength)
}
/** @inheritdoc */
toRaw() {
return { hash: this.hash, byteLength: this.byteLength }
}
/** @inheritdoc */
getHash() {
return this.hash
}
/** @inheritdoc */
isEditable() {
return false
}
/** @inheritdoc */
getByteLength() {
return this.byteLength
}
/** @inheritdoc */
toEager() {
return BPromise.resolve(this)
}
/** @inheritdoc */
toLazy() {
return BPromise.resolve(this)
}
/** @inheritdoc */
toHollow() {
return BPromise.try(() => FileData.createHollow(this.byteLength, null))
}
/** @inheritdoc */
store() {
return BPromise.resolve({ hash: this.hash })
}
}
module.exports = BinaryFileData

View file

@ -0,0 +1,63 @@
'use strict'
const assert = require('check-types').assert
const BPromise = require('bluebird')
const Blob = require('../blob')
const FileData = require('./')
class HashFileData extends FileData {
/**
* @constructor
* @param {string} hash
* @see FileData
*/
constructor(hash) {
super()
assert.match(hash, Blob.HEX_HASH_RX, 'HashFileData: bad hash')
this.hash = hash
}
static fromRaw(raw) {
return new HashFileData(raw.hash)
}
/** @inheritdoc */
toRaw() {
return { hash: this.hash }
}
/** @inheritdoc */
getHash() {
return this.hash
}
/** @inheritdoc */
toEager(blobStore) {
return this.toLazy(blobStore).then(lazyFileData =>
lazyFileData.toEager(blobStore)
)
}
/** @inheritdoc */
toLazy(blobStore) {
return blobStore.getBlob(this.hash).then(blob => {
if (!blob) throw new Error('blob not found: ' + this.hash)
return FileData.createLazyFromBlob(blob)
})
}
/** @inheritdoc */
toHollow(blobStore) {
return blobStore.getBlob(this.hash).then(function (blob) {
return FileData.createHollow(blob.getByteLength(), blob.getStringLength())
})
}
/** @inheritdoc */
store() {
return BPromise.resolve({ hash: this.hash })
}
}
module.exports = HashFileData

View file

@ -0,0 +1,46 @@
'use strict'
const assert = require('check-types').assert
const BPromise = require('bluebird')
const FileData = require('./')
class HollowBinaryFileData extends FileData {
/**
* @constructor
* @param {number} byteLength
* @see FileData
*/
constructor(byteLength) {
super()
assert.integer(byteLength, 'HollowBinaryFileData: bad byteLength')
assert.greaterOrEqual(byteLength, 0, 'HollowBinaryFileData: low byteLength')
this.byteLength = byteLength
}
static fromRaw(raw) {
return new HollowBinaryFileData(raw.byteLength)
}
/** @inheritdoc */
toRaw() {
return { byteLength: this.byteLength }
}
/** @inheritdoc */
getByteLength() {
return this.byteLength
}
/** @inheritdoc */
isEditable() {
return false
}
/** @inheritdoc */
toHollow() {
return BPromise.resolve(this)
}
}
module.exports = HollowBinaryFileData

View file

@ -0,0 +1,55 @@
'use strict'
const assert = require('check-types').assert
const BPromise = require('bluebird')
const FileData = require('./')
class HollowStringFileData extends FileData {
/**
* @constructor
* @param {number} stringLength
* @see FileData
*/
constructor(stringLength) {
super()
assert.integer(stringLength, 'HollowStringFileData: bad stringLength')
assert.greaterOrEqual(
stringLength,
0,
'HollowStringFileData: low stringLength'
)
this.stringLength = stringLength
}
static fromRaw(raw) {
return new HollowStringFileData(raw.stringLength)
}
/** @inheritdoc */
toRaw() {
return { stringLength: this.stringLength }
}
/** @inheritdoc */
getStringLength() {
return this.stringLength
}
/** @inheritdoc */
isEditable() {
return true
}
/** @inheritdoc */
toHollow() {
return BPromise.resolve(this)
}
/** @inheritdoc */
edit(textOperation) {
this.stringLength = textOperation.applyToLength(this.stringLength)
}
}
module.exports = HollowStringFileData

View file

@ -0,0 +1,169 @@
'use strict'
const assert = require('check-types').assert
const BPromise = require('bluebird')
const Blob = require('../blob')
// Dependencies are loaded at the bottom of the file to mitigate circular
// dependency
let BinaryFileData = null
let HashFileData = null
let HollowBinaryFileData = null
let HollowStringFileData = null
let LazyStringFileData = null
let StringFileData = null
/**
* @typedef {import("../types").BlobStore} BlobStore
*/
/**
* @classdesc
* Helper to represent the content of a file. This class and its subclasses
* should be used only through {@link File}.
*/
class FileData {
/** @see File.fromRaw */
static fromRaw(raw) {
if (Object.prototype.hasOwnProperty.call(raw, 'hash')) {
if (Object.prototype.hasOwnProperty.call(raw, 'byteLength'))
return BinaryFileData.fromRaw(raw)
if (Object.prototype.hasOwnProperty.call(raw, 'stringLength'))
return LazyStringFileData.fromRaw(raw)
return HashFileData.fromRaw(raw)
}
if (Object.prototype.hasOwnProperty.call(raw, 'byteLength'))
return HollowBinaryFileData.fromRaw(raw)
if (Object.prototype.hasOwnProperty.call(raw, 'stringLength'))
return HollowStringFileData.fromRaw(raw)
if (Object.prototype.hasOwnProperty.call(raw, 'content'))
return StringFileData.fromRaw(raw)
throw new Error('FileData: bad raw object ' + JSON.stringify(raw))
}
/** @see File.createHollow */
static createHollow(byteLength, stringLength) {
if (stringLength == null) {
return new HollowBinaryFileData(byteLength)
}
return new HollowStringFileData(stringLength)
}
/** @see File.createLazyFromBlob */
static createLazyFromBlob(blob) {
assert.instance(blob, Blob, 'FileData: bad blob')
if (blob.getStringLength() == null) {
return new BinaryFileData(blob.getHash(), blob.getByteLength())
}
return new LazyStringFileData(blob.getHash(), blob.getStringLength())
}
toRaw() {
throw new Error('FileData: toRaw not implemented')
}
/** @see File#getHash */
getHash() {
return null
}
/** @see File#getContent */
getContent() {
return null
}
/** @see File#isEditable */
isEditable() {
return null
}
/** @see File#getByteLength */
getByteLength() {
return null
}
/** @see File#getStringLength */
getStringLength() {
return null
}
/** @see File#edit */
edit(textOperation) {
throw new Error('edit not implemented for ' + JSON.stringify(this))
}
/**
* @function
* @param {BlobStore} blobStore
* @return {BPromise<FileData>}
* @abstract
* @see FileData#load
*/
toEager(blobStore) {
return BPromise.reject(
new Error('toEager not implemented for ' + JSON.stringify(this))
)
}
/**
* @function
* @param {BlobStore} blobStore
* @return {BPromise<FileData>}
* @abstract
* @see FileData#load
*/
toLazy(blobStore) {
return BPromise.reject(
new Error('toLazy not implemented for ' + JSON.stringify(this))
)
}
/**
* @function
* @param {BlobStore} blobStore
* @return {BPromise<FileData>}
* @abstract
* @see FileData#load
*/
toHollow(blobStore) {
return BPromise.reject(
new Error('toHollow not implemented for ' + JSON.stringify(this))
)
}
/**
* @see File#load
* @param {string} kind
* @param {BlobStore} blobStore
* @return {BPromise<FileData>}
*/
load(kind, blobStore) {
if (kind === 'eager') return this.toEager(blobStore)
if (kind === 'lazy') return this.toLazy(blobStore)
if (kind === 'hollow') return this.toHollow(blobStore)
throw new Error('bad file data load kind: ' + kind)
}
/**
* @see File#store
* @function
* @param {BlobStore} blobStore
* @return {BPromise<Object>} a raw HashFile
* @abstract
*/
store(blobStore) {
return BPromise.reject(
new Error('store not implemented for ' + JSON.stringify(this))
)
}
}
module.exports = FileData
BinaryFileData = require('./binary_file_data')
HashFileData = require('./hash_file_data')
HollowBinaryFileData = require('./hollow_binary_file_data')
HollowStringFileData = require('./hollow_string_file_data')
LazyStringFileData = require('./lazy_string_file_data')
StringFileData = require('./string_file_data')

View file

@ -0,0 +1,137 @@
'use strict'
const _ = require('lodash')
const assert = require('check-types').assert
const BPromise = require('bluebird')
const Blob = require('../blob')
const FileData = require('./')
const EagerStringFileData = require('./string_file_data')
const TextOperation = require('../operation/text_operation')
class LazyStringFileData extends FileData {
/**
* @constructor
* @param {string} hash
* @param {number} stringLength
* @param {Array.<TextOperation>} [textOperations]
* @see FileData
*/
constructor(hash, stringLength, textOperations) {
super()
assert.match(hash, Blob.HEX_HASH_RX)
assert.greaterOrEqual(stringLength, 0)
assert.maybe.array.of.instance(textOperations, TextOperation)
this.hash = hash
this.stringLength = stringLength
this.textOperations = textOperations || []
}
static fromRaw(raw) {
return new LazyStringFileData(
raw.hash,
raw.stringLength,
raw.textOperations && _.map(raw.textOperations, TextOperation.fromJSON)
)
}
/** @inheritdoc */
toRaw() {
const raw = { hash: this.hash, stringLength: this.stringLength }
if (this.textOperations.length) {
raw.textOperations = _.map(this.textOperations, function (textOperation) {
return textOperation.toJSON()
})
}
return raw
}
/** @inheritdoc */
getHash() {
if (this.textOperations.length) return null
return this.hash
}
/** @inheritdoc */
isEditable() {
return true
}
/**
* For project quota checking, we approximate the byte length by the UTF-8
* length for hollow files. This isn't strictly speaking correct; it is an
* underestimate of byte length.
*
* @return {number}
*/
getByteLength() {
return this.stringLength
}
/** @inheritdoc */
getStringLength() {
return this.stringLength
}
/**
* Get the cached text operations that are to be applied to this file to get
* from the content with its last known hash to its latest content.
*
* @return {Array.<TextOperation>}
*/
getTextOperations() {
return this.textOperations
}
/** @inheritdoc */
toEager(blobStore) {
return blobStore.getString(this.hash).then(content => {
return new EagerStringFileData(
computeContent(this.textOperations, content)
)
})
}
/** @inheritdoc */
toLazy() {
return BPromise.resolve(this)
}
/** @inheritdoc */
toHollow() {
return BPromise.try(() => FileData.createHollow(null, this.stringLength))
}
/** @inheritdoc */
edit(textOperation) {
this.stringLength = textOperation.applyToLength(this.stringLength)
this.textOperations.push(textOperation)
}
/** @inheritdoc */
store(blobStore) {
if (this.textOperations.length === 0)
return BPromise.resolve({ hash: this.hash })
return blobStore
.getString(this.hash)
.then(content => {
return blobStore.putString(computeContent(this.textOperations, content))
})
.then(blob => {
this.hash = blob.getHash()
this.stringLength = blob.getStringLength()
this.textOperations.length = 0
return { hash: this.hash }
})
}
}
function computeContent(textOperations, initialFile) {
function applyTextOperation(content, textOperation) {
return textOperation.apply(content)
}
return _.reduce(textOperations, applyTextOperation, initialFile)
}
module.exports = LazyStringFileData

View file

@ -0,0 +1,80 @@
'use strict'
const assert = require('check-types').assert
const BPromise = require('bluebird')
const FileData = require('./')
/**
* @typedef {import("../types").StringFileRawData} StringFileRawData
*/
class StringFileData extends FileData {
/**
* @constructor
* @param {string} content
*/
constructor(content) {
super()
assert.string(content)
this.content = content
}
static fromRaw(raw) {
return new StringFileData(raw.content)
}
/**
* @inheritdoc
* @returns {StringFileRawData}
*/
toRaw() {
return { content: this.content }
}
/** @inheritdoc */
isEditable() {
return true
}
/** @inheritdoc */
getContent() {
return this.content
}
/** @inheritdoc */
getByteLength() {
return Buffer.byteLength(this.content)
}
/** @inheritdoc */
getStringLength() {
return this.content.length
}
/** @inheritdoc */
edit(textOperation) {
this.content = textOperation.apply(this.content)
}
/** @inheritdoc */
toEager() {
return BPromise.resolve(this)
}
/** @inheritdoc */
toHollow() {
return BPromise.try(() =>
FileData.createHollow(this.getByteLength(), this.getStringLength())
)
}
/** @inheritdoc */
store(blobStore) {
return blobStore.putString(this.content).then(function (blob) {
return { hash: blob.getHash() }
})
}
}
module.exports = StringFileData

View file

@ -0,0 +1,317 @@
'use strict'
const BPromise = require('bluebird')
const _ = require('lodash')
const assert = require('check-types').assert
const OError = require('@overleaf/o-error')
const File = require('./file')
const safePathname = require('./safe_pathname')
/**
* A set of {@link File}s. Several properties are enforced on the pathnames:
*
* 1. File names and paths are case sensitive and can differ by case alone. This
* is consistent with most Linux file systems, but it is not consistent with
* Windows or OS X. Ideally, we would be case-preserving and case insensitive,
* like they are. And we used to be, but it caused too many incompatibilities
* with the old system, which was case sensitive. See
* https://github.com/overleaf/overleaf-ot-prototype/blob/
* 19ed046c09f5a4d14fa12b3ea813ce0d977af88a/editor/core/lib/file_map.js
* for an implementation of this map with those properties.
*
* 2. Uniqueness: No two pathnames are the same.
*
* 3. No type conflicts: A pathname cannot refer to both a file and a directory
* within the same snapshot. That is, you can't have pathnames `a` and `a/b` in
* the same file map; {@see FileMap#wouldConflict}.
*
* @param {Object.<String, File>} files
*/
class FileMap {
constructor(files) {
// create bare object for use as Map
// http://ryanmorr.com/true-hash-maps-in-javascript/
this.files = Object.create(null)
_.assign(this.files, files)
checkPathnamesAreUnique(this.files)
checkPathnamesDoNotConflict(this)
}
static fromRaw(raw) {
assert.object(raw, 'bad raw files')
return new FileMap(_.mapValues(raw, File.fromRaw))
}
/**
* Convert to raw object for serialization.
*
* @return {Object}
*/
toRaw() {
function fileToRaw(file) {
return file.toRaw()
}
return _.mapValues(this.files, fileToRaw)
}
/**
* Create the given file.
*
* @param {string} pathname
* @param {File} file
*/
addFile(pathname, file) {
checkPathname(pathname)
assert.object(file, 'bad file')
checkNewPathnameDoesNotConflict(this, pathname)
addFile(this.files, pathname, file)
}
/**
* Remove the given file.
*
* @param {string} pathname
*/
removeFile(pathname) {
checkPathname(pathname)
const key = findPathnameKey(this.files, pathname)
if (!key) {
throw new FileMap.FileNotFoundError(pathname)
}
delete this.files[key]
}
/**
* Move or remove a file. If the origin file does not exist, or if the old
* and new paths are identical, this has no effect.
*
* @param {string} pathname
* @param {string} newPathname if a blank string, {@link FileMap#removeFile}
*/
moveFile(pathname, newPathname) {
if (pathname === newPathname) return
if (newPathname === '') return this.removeFile(pathname)
checkPathname(pathname)
checkPathname(newPathname)
checkNewPathnameDoesNotConflict(this, newPathname, pathname)
const key = findPathnameKey(this.files, pathname)
if (!key) {
throw new FileMap.FileNotFoundError(pathname)
}
const file = this.files[key]
delete this.files[key]
addFile(this.files, newPathname, file)
}
/**
* The number of files in the file map.
*
* @return {number}
*/
countFiles() {
return _.size(this.files)
}
/**
* Get a file by its pathname.
*
* @param {string} pathname
* @return {File | null | undefined}
*/
getFile(pathname) {
const key = findPathnameKey(this.files, pathname)
return key && this.files[key]
}
/**
* Whether the given pathname conflicts with any file in the map.
*
* Paths conflict in type if one path is a strict prefix of the other path. For
* example, 'a/b' conflicts with 'a', because in the former case 'a' is a
* folder, but in the latter case it is a file. Similarly, the pathname 'a/b/c'
* conflicts with 'a' and 'a/b', but it does not conflict with 'a/b/c', 'a/x',
* or 'a/b/x'. (In our case, identical paths don't conflict, because AddFile
* and MoveFile overwrite existing files.)
*
* @param {string} pathname
* @param {string} [ignoredPathname] pretend this pathname does not exist
*/
wouldConflict(pathname, ignoredPathname) {
checkPathname(pathname)
assert.maybe.string(ignoredPathname)
const pathnames = this.getPathnames()
const dirname = pathname + '/'
// Check the filemap to see whether the supplied pathname is a
// parent of any entry, or any entry is a parent of the pathname.
for (let i = 0; i < pathnames.length; i++) {
// First check if pathname is a strict prefix of pathnames[i] (and that
// pathnames[i] is not ignored)
if (
pathnames[i].startsWith(dirname) &&
!pathnamesEqual(pathnames[i], ignoredPathname)
) {
return true
}
// Now make the reverse check, whether pathnames[i] is a strict prefix of
// pathname. To avoid expensive string concatenation on each pathname we
// first perform a partial check with a.startsWith(b), and then do the
// full check for a subsequent '/' if this passes. This saves about 25%
// of the runtime. Again only return a conflict if pathnames[i] is not
// ignored.
if (
pathname.startsWith(pathnames[i]) &&
pathname.length > pathnames[i].length &&
pathname[pathnames[i].length] === '/' &&
!pathnamesEqual(pathnames[i], ignoredPathname)
) {
return true
}
}
// No conflicts - after excluding ignoredPathname, there were no entries
// which were a strict prefix of pathname, and pathname was not a strict
// prefix of any entry.
return false
}
/** @see Snapshot#getFilePathnames */
getPathnames() {
return _.keys(this.files)
}
/**
* Map the files in this map to new values.
* @param {function} iteratee
* @return {Object}
*/
map(iteratee) {
return _.mapValues(this.files, iteratee)
}
/**
* Map the files in this map to new values asynchronously, with an optional
* limit on concurrency.
* @param {function} iteratee like for _.mapValues
* @param {number} [concurrency] as for BPromise.map
* @return {Object}
*/
mapAsync(iteratee, concurrency) {
assert.maybe.number(concurrency, 'bad concurrency')
const pathnames = this.getPathnames()
return BPromise.map(
pathnames,
file => {
return iteratee(this.getFile(file), file, pathnames)
},
{ concurrency: concurrency || 1 }
).then(files => {
return _.zipObject(pathnames, files)
})
}
}
class PathnameError extends OError {}
FileMap.PathnameError = PathnameError
class NonUniquePathnameError extends PathnameError {
constructor(pathnames) {
super('pathnames are not unique: ' + pathnames, { pathnames })
this.pathnames = pathnames
}
}
FileMap.NonUniquePathnameError = NonUniquePathnameError
class BadPathnameError extends PathnameError {
constructor(pathname) {
super(pathname + ' is not a valid pathname', { pathname })
this.pathname = pathname
}
}
FileMap.BadPathnameError = BadPathnameError
class PathnameConflictError extends PathnameError {
constructor(pathname) {
super(`pathname '${pathname}' conflicts with another file`, { pathname })
this.pathname = pathname
}
}
FileMap.PathnameConflictError = PathnameConflictError
class FileNotFoundError extends PathnameError {
constructor(pathname) {
super(`file ${pathname} does not exist`, { pathname })
this.pathname = pathname
}
}
FileMap.FileNotFoundError = FileNotFoundError
function pathnamesEqual(pathname0, pathname1) {
return pathname0 === pathname1
}
function pathnamesAreUnique(files) {
const keys = _.keys(files)
return _.uniqWith(keys, pathnamesEqual).length === keys.length
}
function checkPathnamesAreUnique(files) {
if (pathnamesAreUnique(files)) return
throw new FileMap.NonUniquePathnameError(_.keys(files))
}
function checkPathname(pathname) {
assert.nonEmptyString(pathname, 'bad pathname')
if (safePathname.isClean(pathname)) return
throw new FileMap.BadPathnameError(pathname)
}
function checkNewPathnameDoesNotConflict(fileMap, pathname, ignoredPathname) {
if (fileMap.wouldConflict(pathname, ignoredPathname)) {
throw new FileMap.PathnameConflictError(pathname)
}
}
function checkPathnamesDoNotConflict(fileMap) {
const pathnames = fileMap.getPathnames()
// check pathnames for validity first
pathnames.forEach(checkPathname)
// convert pathnames to candidate directory names
const dirnames = []
for (let i = 0; i < pathnames.length; i++) {
dirnames[i] = pathnames[i] + '/'
}
// sort in lexical order and check if one directory contains another
dirnames.sort()
for (let i = 0; i < dirnames.length - 1; i++) {
if (dirnames[i + 1].startsWith(dirnames[i])) {
// strip trailing slash to get original pathname
const conflictPathname = dirnames[i + 1].substr(0, -1)
throw new FileMap.PathnameConflictError(conflictPathname)
}
}
}
//
// This function is somewhat vestigial: it was used when this map used
// case-insensitive pathname comparison. We could probably simplify some of the
// logic in the callers, but in the hope that we will one day return to
// case-insensitive semantics, we've just left things as-is for now.
//
function findPathnameKey(files, pathname) {
// we can check for the key without worrying about properties
// in the prototype because we are now using a bare object/
if (pathname in files) return pathname
}
function addFile(files, pathname, file) {
const key = findPathnameKey(files, pathname)
if (key) delete files[key]
files[pathname] = file
}
module.exports = FileMap

View file

@ -0,0 +1,125 @@
'use strict'
const assert = require('check-types').assert
const BPromise = require('bluebird')
const Change = require('./change')
const Snapshot = require('./snapshot')
/**
* @typedef {import("./types").BlobStore} BlobStore
*/
/**
* @constructor
* @param {Snapshot} snapshot
* @param {Array.<Change>} changes
*
* @classdesc
* A History is a {@link Snapshot} and a sequence of {@link Change}s that can
* be applied to produce a new snapshot.
*/
function History(snapshot, changes) {
assert.instance(snapshot, Snapshot, 'bad snapshot')
assert.maybe.array.of.instance(changes, Change, 'bad changes')
this.snapshot = snapshot
this.changes = changes || []
}
History.fromRaw = function historyFromRaw(raw) {
return new History(
Snapshot.fromRaw(raw.snapshot),
raw.changes.map(Change.fromRaw)
)
}
History.prototype.toRaw = function historyToRaw() {
function changeToRaw(change) {
return change.toRaw()
}
return {
snapshot: this.snapshot.toRaw(),
changes: this.changes.map(changeToRaw),
}
}
History.prototype.getSnapshot = function () {
return this.snapshot
}
History.prototype.getChanges = function () {
return this.changes
}
History.prototype.countChanges = function historyCountChanges() {
return this.changes.length
}
/**
* Add changes to this history.
*
* @param {Array.<Change>} changes
*/
History.prototype.pushChanges = function historyPushChanges(changes) {
this.changes.push.apply(this.changes, changes)
}
/**
* If this History references blob hashes, either in the Snapshot or the
* Changes, add them to the given set.
*
* @param {Set.<String>} blobHashes
*/
History.prototype.findBlobHashes = function historyFindBlobHashes(blobHashes) {
function findChangeBlobHashes(change) {
change.findBlobHashes(blobHashes)
}
this.snapshot.findBlobHashes(blobHashes)
this.changes.forEach(findChangeBlobHashes)
}
/**
* If this History contains any File objects, load them.
*
* @param {string} kind see {File#load}
* @param {BlobStore} blobStore
* @return {Promise}
*/
History.prototype.loadFiles = function historyLoadFiles(kind, blobStore) {
function loadChangeFiles(change) {
return change.loadFiles(kind, blobStore)
}
return BPromise.join(
this.snapshot.loadFiles(kind, blobStore),
BPromise.each(this.changes, loadChangeFiles)
)
}
/**
* Return a version of this history that is suitable for long term storage.
* This requires that we store the content of file objects in the provided
* blobStore.
*
* @param {BlobStore} blobStore
* @param {number} [concurrency] applies separately to files, changes and
* operations
* @return {Promise.<Object>}
*/
History.prototype.store = function historyStoreFunc(blobStore, concurrency) {
assert.maybe.number(concurrency, 'bad concurrency')
function storeChange(change) {
return change.store(blobStore, concurrency)
}
return BPromise.join(
this.snapshot.store(blobStore, concurrency),
BPromise.map(this.changes, storeChange, { concurrency: concurrency || 1 })
).then(([rawSnapshot, rawChanges]) => {
return {
snapshot: rawSnapshot,
changes: rawChanges,
}
})
}
module.exports = History

View file

@ -0,0 +1,82 @@
'use strict'
const assert = require('check-types').assert
/**
* @constructor
* @param {string} text
* @classdesc
* A user-configurable label that can be attached to a specific change. Labels
* are not versioned, and they are not stored alongside the Changes in Chunks.
* They are instead intended to provide external markers into the history of the
* project.
*/
function Label(text, authorId, timestamp, version) {
assert.string(text, 'bad text')
assert.maybe.integer(authorId, 'bad author id')
assert.date(timestamp, 'bad timestamp')
assert.integer(version, 'bad version')
this.text = text
this.authorId = authorId
this.timestamp = timestamp
this.version = version
}
/**
* Create a Label from its raw form.
*
* @param {Object} raw
* @return {Label}
*/
Label.fromRaw = function labelFromRaw(raw) {
return new Label(raw.text, raw.authorId, new Date(raw.timestamp), raw.version)
}
/**
* Convert the Label to raw form for transmission.
*
* @return {Object}
*/
Label.prototype.toRaw = function labelToRaw() {
return {
text: this.text,
authorId: this.authorId,
timestamp: this.timestamp.toISOString(),
version: this.version,
}
}
/**
* @return {string}
*/
Label.prototype.getText = function () {
return this.text
}
/**
* The ID of the author, if any. Note that we now require all saved versions to
* have an author, but this was not always the case, so we have to allow nulls
* here for historical reasons.
*
* @return {number | null | undefined}
*/
Label.prototype.getAuthorId = function () {
return this.authorId
}
/**
* @return {Date}
*/
Label.prototype.getTimestamp = function () {
return this.timestamp
}
/**
* @return {number | undefined}
*/
Label.prototype.getVersion = function () {
return this.version
}
module.exports = Label

View file

@ -0,0 +1,81 @@
'use strict'
const assert = require('check-types').assert
const File = require('../file')
const Operation = require('./')
/**
* @classdesc
* Adds a new file to a project.
*/
class AddFileOperation extends Operation {
/**
* @constructor
* @param {string} pathname
* @param {File} file
*/
constructor(pathname, file) {
super()
assert.string(pathname, 'bad pathname')
assert.object(file, 'bad file')
this.pathname = pathname
this.file = file
}
/**
* @return {String}
*/
getPathname() {
return this.pathname
}
/**
* TODO
* @param {Object} raw
* @return {AddFileOperation}
*/
static fromRaw(raw) {
return new AddFileOperation(raw.pathname, File.fromRaw(raw.file))
}
/**
* @inheritdoc
*/
toRaw() {
return { pathname: this.pathname, file: this.file.toRaw() }
}
/**
* @inheritdoc
*/
getFile() {
return this.file
}
/** @inheritdoc */
findBlobHashes(blobHashes) {
const hash = this.file.getHash()
if (hash) blobHashes.add(hash)
}
/** @inheritdoc */
loadFiles(kind, blobStore) {
return this.file.load(kind, blobStore)
}
store(blobStore) {
return this.file.store(blobStore).then(rawFile => {
return { pathname: this.pathname, file: rawFile }
})
}
/**
* @inheritdoc
*/
applyTo(snapshot) {
snapshot.addFile(this.pathname, this.file.clone())
}
}
module.exports = AddFileOperation

View file

@ -0,0 +1,93 @@
'use strict'
const Operation = require('./')
const TextOperation = require('./text_operation')
/**
* @classdesc
* Edit a file in place. It is a wrapper around a single TextOperation.
*/
class EditFileOperation extends Operation {
/**
* @constructor
* @param {string} pathname
* @param {TextOperation} textOperation
*/
constructor(pathname, textOperation) {
super()
this.pathname = pathname
this.textOperation = textOperation
}
/**
* @inheritdoc
*/
toRaw() {
return {
pathname: this.pathname,
textOperation: this.textOperation.toJSON(),
}
}
/**
* Deserialize an EditFileOperation.
*
* @param {Object} raw
* @return {EditFileOperation}
*/
static fromRaw(raw) {
return new EditFileOperation(
raw.pathname,
TextOperation.fromJSON(raw.textOperation)
)
}
getPathname() {
return this.pathname
}
getTextOperation() {
return this.textOperation
}
/**
* @inheritdoc
*/
applyTo(snapshot) {
snapshot.editFile(this.pathname, this.textOperation)
}
/**
* @inheritdoc
*/
canBeComposedWithForUndo(other) {
return (
this.canBeComposedWith(other) &&
this.textOperation.canBeComposedWithForUndo(other.textOperation)
)
}
/**
* @inheritdoc
*/
canBeComposedWith(other) {
// Ensure that other operation is an edit file operation
if (!(other instanceof EditFileOperation)) return false
// Ensure that both operations are editing the same file
if (this.getPathname() !== other.getPathname()) return false
return this.textOperation.canBeComposedWith(other.textOperation)
}
/**
* @inheritdoc
*/
compose(other) {
return new EditFileOperation(
this.pathname,
this.textOperation.compose(other.textOperation)
)
}
}
module.exports = EditFileOperation

View file

@ -0,0 +1,463 @@
'use strict'
const _ = require('lodash')
const assert = require('check-types').assert
const BPromise = require('bluebird')
const TextOperation = require('./text_operation')
// Dependencies are loaded at the bottom of the file to mitigate circular
// dependency
let NoOperation = null
let AddFileOperation = null
let MoveFileOperation = null
let EditFileOperation = null
let SetFileMetadataOperation = null
/**
* @typedef {import("../types").BlobStore} BlobStore
* @typedef {import("../snapshot")} Snapshot
*/
/**
* @classdesc
* An `Operation` changes a `Snapshot` when it is applied. See the
* {@tutorial OT} tutorial for background.
*/
class Operation {
/**
* Deserialize an Operation.
*
* @param {Object} raw
* @return {Operation} one of the subclasses
*/
static fromRaw(raw) {
if (Object.prototype.hasOwnProperty.call(raw, 'file')) {
return AddFileOperation.fromRaw(raw)
}
if (Object.prototype.hasOwnProperty.call(raw, 'textOperation')) {
return EditFileOperation.fromRaw(raw)
}
if (Object.prototype.hasOwnProperty.call(raw, 'newPathname')) {
return new MoveFileOperation(raw.pathname, raw.newPathname)
}
if (Object.prototype.hasOwnProperty.call(raw, 'metadata')) {
return new SetFileMetadataOperation(raw.pathname, raw.metadata)
}
if (_.isEmpty(raw)) {
return new NoOperation()
}
throw new Error('invalid raw operation ' + JSON.stringify(raw))
}
/**
* Serialize an Operation.
*
* @return {Object}
*/
toRaw() {
return {}
}
/**
* Whether this operation does nothing when applied.
*
* @return {Boolean}
*/
isNoOp() {
return false
}
/**
* If this Operation references blob hashes, add them to the given Set.
*
* @param {Set.<String>} blobHashes
*/
findBlobHashes(blobHashes) {}
/**
* If this operation references any files, load the files.
*
* @param {string} kind see {File#load}
* @param {BlobStore} blobStore
* @return {Promise}
*/
loadFiles(kind, blobStore) {
return BPromise.resolve()
}
/**
* Return a version of this operation that is suitable for long term storage.
* In most cases, we just need to convert the operation to raw form, but if
* the operation involves File objects, we may need to store their content.
*
* @param {BlobStore} blobStore
* @return {Promise.<Object>}
*/
store(blobStore) {
return BPromise.try(() => this.toRaw())
}
/**
* Apply this Operation to a snapshot.
*
* The snapshot is modified in place.
*
* @param {Snapshot} snapshot
*/
applyTo(snapshot) {
assert.object(snapshot, 'bad snapshot')
}
/**
* Whether this operation can be composed with another operation to produce a
* single operation of the same type as this one, while keeping the composed
* operation small and logical enough to be used in the undo stack.
*
* @param {Operation} other
* @return {Boolean}
*/
canBeComposedWithForUndo(other) {
return false
}
/**
* Whether this operation can be composed with another operation to produce a
* single operation of the same type as this one.
*
* TODO Moves can be composed. For example, if you rename a to b and then decide
* shortly after that actually you want to call it c, we could compose the two
* to get a -> c). Edits can also be composed --- see rules in TextOperation.
* We also need to consider the Change --- we will need to consider both time
* and author(s) when composing changes. I guess that AddFile can also be
* composed in some cases --- if you upload a file and then decide it was the
* wrong one and upload a new one, we could drop the one in the middle, but
* that seems like a pretty rare case.
*
* @param {Operation} other
* @return {Boolean}
*/
canBeComposedWith(other) {
return false
}
/**
* Compose this operation with another operation to produce a single operation
* of the same type as this one.
*
* @param {Operation} other
* @return {Operation}
*/
compose(other) {
throw new Error('not implemented')
}
/**
* Transform takes two operations A and B that happened concurrently and
* produces two operations A' and B' (in an array) such that
* `apply(apply(S, A), B') = apply(apply(S, B), A')`.
*
* That is, if one client applies A and then B', they get the same result as
* another client who applies B and then A'.
*
* @param {Operation} a
* @param {Operation} b
* @return {Operation[]} operations `[a', b']`
*/
static transform(a, b) {
if (a.isNoOp() || b.isNoOp()) return [b, a]
function transpose(transformer) {
return transformer(b, a).reverse()
}
const bIsAddFile = b instanceof AddFileOperation
const bIsEditFile = b instanceof EditFileOperation
const bIsMoveFile = b instanceof MoveFileOperation
const bIsSetFileMetadata = b instanceof SetFileMetadataOperation
if (a instanceof AddFileOperation) {
if (bIsAddFile) return transformAddFileAddFile(a, b)
if (bIsMoveFile) return transformAddFileMoveFile(a, b)
if (bIsEditFile) return transformAddFileEditFile(a, b)
if (bIsSetFileMetadata) return transformAddFileSetFileMetadata(a, b)
throw new Error('bad op b')
}
if (a instanceof MoveFileOperation) {
if (bIsAddFile) return transpose(transformAddFileMoveFile)
if (bIsMoveFile) return transformMoveFileMoveFile(a, b)
if (bIsEditFile) return transformMoveFileEditFile(a, b)
if (bIsSetFileMetadata) return transformMoveFileSetFileMetadata(a, b)
throw new Error('bad op b')
}
if (a instanceof EditFileOperation) {
if (bIsAddFile) return transpose(transformAddFileEditFile)
if (bIsMoveFile) return transpose(transformMoveFileEditFile)
if (bIsEditFile) return transformEditFileEditFile(a, b)
if (bIsSetFileMetadata) return transformEditFileSetFileMetadata(a, b)
throw new Error('bad op b')
}
if (a instanceof SetFileMetadataOperation) {
if (bIsAddFile) return transpose(transformAddFileSetFileMetadata)
if (bIsMoveFile) return transpose(transformMoveFileSetFileMetadata)
if (bIsEditFile) return transpose(transformEditFileSetFileMetadata)
if (bIsSetFileMetadata) return transformSetFileMetadatas(a, b)
throw new Error('bad op b')
}
throw new Error('bad op a')
}
/**
* Transform each operation in `a` by each operation in `b` and save the primed
* operations in place.
*
* @param {Array.<Operation>} as - modified in place
* @param {Array.<Operation>} bs - modified in place
*/
static transformMultiple(as, bs) {
for (let i = 0; i < as.length; ++i) {
for (let j = 0; j < bs.length; ++j) {
const primes = Operation.transform(as[i], bs[j])
as[i] = primes[0]
bs[j] = primes[1]
}
}
}
static addFile(pathname, file) {
return new AddFileOperation(pathname, file)
}
static editFile(pathname, textOperation) {
return new EditFileOperation(pathname, textOperation)
}
static moveFile(pathname, newPathname) {
return new MoveFileOperation(pathname, newPathname)
}
static removeFile(pathname) {
return new MoveFileOperation(pathname, '')
}
static setFileMetadata(pathname, metadata) {
return new SetFileMetadataOperation(pathname, metadata)
}
}
//
// Transform
//
// The way to read these transform functions is that
// 1. return_value[0] is the op to be applied after arguments[1], and
// 2. return_value[1] is the op to be applied after arguments[0],
// in order to arrive at the same project state.
//
function transformAddFileAddFile(add1, add2) {
if (add1.getPathname() === add2.getPathname()) {
return [Operation.NO_OP, add2] // add2 wins
}
return [add1, add2]
}
function transformAddFileMoveFile(add, move) {
function relocateAddFile() {
return new AddFileOperation(move.getNewPathname(), add.getFile().clone())
}
if (add.getPathname() === move.getPathname()) {
if (move.isRemoveFile()) {
return [add, Operation.NO_OP]
}
return [
relocateAddFile(),
new MoveFileOperation(add.getPathname(), move.getNewPathname()),
]
}
if (add.getPathname() === move.getNewPathname()) {
return [relocateAddFile(), new MoveFileOperation(move.getPathname(), '')]
}
return [add, move]
}
function transformAddFileEditFile(add, edit) {
if (add.getPathname() === edit.getPathname()) {
return [add, Operation.NO_OP] // the add wins
}
return [add, edit]
}
function transformAddFileSetFileMetadata(add, set) {
if (add.getPathname() === set.getPathname()) {
const newFile = add.getFile().clone()
newFile.setMetadata(set.getMetadata())
return [new AddFileOperation(add.getPathname(), newFile), set]
}
return [add, set]
}
//
// This is one of the trickier ones. There are 15 possible equivalence
// relationships between our four variables:
//
// path1, newPath1, path2, newPath2 --- "same move" (all equal)
//
// path1, newPath1, path2 | newPath2 --- "no-ops" (1)
// path1, newPath1, newPath2 | path2 --- "no-ops" (1)
// path1, path2, newPath2 | newPath1 --- "no-ops" (2)
// newPath1, path2, newPath2 | path1 --- "no-ops" (2)
//
// path1, newPath1 | path2, newPath2 --- "no-ops" (1 and 2)
// path1, path2 | newPath1, newPath2 --- "same move"
// path1, newPath2 | newPath1, path2 --- "opposite moves"
//
// path1, newPath1 | path2 | newPath2 --- "no-ops" (1)
// path1, path2 | newPath1 | newPath2 --- "divergent moves"
// path1, newPath2 | newPath1 | path2 --- "transitive move"
// newPath1, path2 | path1 | newPath2 --- "transitive move"
// newPath1, newPath2 | path1 | path2 --- "convergent move"
// path2, newPath2 | path1 | newPath1 --- "no-ops" (2)
//
// path1 | newPath1 | path2 | newPath2 --- "no conflict"
//
function transformMoveFileMoveFile(move1, move2) {
const path1 = move1.getPathname()
const path2 = move2.getPathname()
const newPath1 = move1.getNewPathname()
const newPath2 = move2.getNewPathname()
// the same move
if (path1 === path2 && newPath1 === newPath2) {
return [Operation.NO_OP, Operation.NO_OP]
}
// no-ops
if (path1 === newPath1 && path2 === newPath2) {
return [Operation.NO_OP, Operation.NO_OP]
}
if (path1 === newPath1) {
return [Operation.NO_OP, move2]
}
if (path2 === newPath2) {
return [move1, Operation.NO_OP]
}
// opposite moves (foo -> bar, bar -> foo)
if (path1 === newPath2 && path2 === newPath1) {
// We can't handle this very well: if we wanted move2 (say) to win, move2'
// would have to be addFile(foo) with the content of bar, but we don't have
// the content of bar available here. So, we just destroy both files.
return [Operation.removeFile(path1), Operation.removeFile(path2)]
}
// divergent moves (foo -> bar, foo -> baz); convention: move2 wins
if (path1 === path2 && newPath1 !== newPath2) {
return [Operation.NO_OP, Operation.moveFile(newPath1, newPath2)]
}
// convergent move (foo -> baz, bar -> baz); convention: move2 wins
if (newPath1 === newPath2 && path1 !== path2) {
return [Operation.removeFile(path1), move2]
}
// transitive move:
// 1: foo -> baz, 2: bar -> foo (result: bar -> baz) or
// 1: foo -> bar, 2: bar -> baz (result: foo -> baz)
if (path1 === newPath2 && newPath1 !== path2) {
return [
Operation.moveFile(newPath2, newPath1),
Operation.moveFile(path2, newPath1),
]
}
if (newPath1 === path2 && path1 !== newPath2) {
return [
Operation.moveFile(path1, newPath2),
Operation.moveFile(newPath1, newPath2),
]
}
// no conflict
return [move1, move2]
}
function transformMoveFileEditFile(move, edit) {
if (move.getPathname() === edit.getPathname()) {
if (move.isRemoveFile()) {
// let the remove win
return [move, Operation.NO_OP]
}
return [
move,
Operation.editFile(move.getNewPathname(), edit.getTextOperation()),
]
}
if (move.getNewPathname() === edit.getPathname()) {
// let the move win
return [move, Operation.NO_OP]
}
return [move, edit]
}
function transformMoveFileSetFileMetadata(move, set) {
if (move.getPathname() === set.getPathname()) {
return [
move,
Operation.setFileMetadata(move.getNewPathname(), set.getMetadata()),
]
}
// A: mv foo -> bar
// B: set bar.x
//
// A': mv foo -> bar
// B': nothing
if (move.getNewPathname() === set.getPathname()) {
return [move, Operation.NO_OP] // let the move win
}
return [move, set]
}
function transformEditFileEditFile(edit1, edit2) {
if (edit1.getPathname() === edit2.getPathname()) {
const primeTextOps = TextOperation.transform(
edit1.getTextOperation(),
edit2.getTextOperation()
)
return [
Operation.editFile(edit1.getPathname(), primeTextOps[0]),
Operation.editFile(edit2.getPathname(), primeTextOps[1]),
]
}
return [edit1, edit2]
}
function transformEditFileSetFileMetadata(edit, set) {
// There is no conflict.
return [edit, set]
}
function transformSetFileMetadatas(set1, set2) {
if (set1.getPathname() === set2.getPathname()) {
return [Operation.NO_OP, set2] // set2 wins
}
return [set1, set2]
}
module.exports = Operation
// Work around circular import
NoOperation = require('./no_operation')
AddFileOperation = require('./add_file_operation')
MoveFileOperation = require('./move_file_operation')
EditFileOperation = require('./edit_file_operation')
SetFileMetadataOperation = require('./set_file_metadata_operation')
Operation.NO_OP = new NoOperation()

View file

@ -0,0 +1,54 @@
'use strict'
const Operation = require('./')
/**
* @classdesc
* Moves or removes a file from a project.
*/
class MoveFileOperation extends Operation {
/**
* @param {string} pathname
* @param {string} newPathname
*/
constructor(pathname, newPathname) {
super()
this.pathname = pathname
this.newPathname = newPathname
}
/**
* @inheritdoc
*/
toRaw() {
return {
pathname: this.pathname,
newPathname: this.newPathname,
}
}
getPathname() {
return this.pathname
}
getNewPathname() {
return this.newPathname
}
/**
* Whether this operation is a MoveFile operation that deletes the file.
*
* @return {boolean}
*/
isRemoveFile() {
return this.getNewPathname() === ''
}
/**
* @inheritdoc
*/
applyTo(snapshot) {
snapshot.moveFile(this.getPathname(), this.getNewPathname())
}
}
module.exports = MoveFileOperation

View file

@ -0,0 +1,21 @@
'use strict'
const Operation = require('./')
/**
* @classdesc
* An explicit no-operation.
*
* There are several no-ops, such as moving a file to itself, but it's useful
* to have a generic no-op as well.
*/
class NoOperation extends Operation {
/**
* @inheritdoc
*/
isNoOp() {
return true
}
}
module.exports = NoOperation

View file

@ -0,0 +1,55 @@
'use strict'
const _ = require('lodash')
const assert = require('check-types').assert
const Operation = require('./')
/**
* @classdesc
* Moves or removes a file from a project.
*/
class SetFileMetadataOperation extends Operation {
/**
* @constructor
* @param {string} pathname
* @param {Object} metadata
*/
constructor(pathname, metadata) {
super()
assert.string(pathname, 'SetFileMetadataOperation: bad pathname')
assert.object(metadata, 'SetFileMetadataOperation: bad metadata')
this.pathname = pathname
this.metadata = metadata
}
/**
* @inheritdoc
*/
toRaw() {
return {
pathname: this.pathname,
metadata: _.cloneDeep(this.metadata),
}
}
getPathname() {
return this.pathname
}
getMetadata() {
return this.metadata
}
/**
* @inheritdoc
*/
applyTo(snapshot) {
const file = snapshot.getFile(this.pathname)
if (!file) return
file.setMetadata(this.metadata)
}
}
module.exports = SetFileMetadataOperation

View file

@ -0,0 +1,682 @@
/**
* The text operation from OT.js with some minor cosmetic changes.
*
* Specifically, this is based on
* https://github.com/Operational-Transformation/ot.js/
* blob/298825f58fb51fefb352e7df5ddbc668f4d5646f/lib/text-operation.js
* from 18 Mar 2013.
*/
'use strict'
const containsNonBmpChars = require('../util').containsNonBmpChars
const OError = require('@overleaf/o-error')
/**
* Create an empty text operation.
*
* @class
*/
function TextOperation() {
// When an operation is applied to an input string, you can think of this as
// if an imaginary cursor runs over the entire string and skips over some
// parts, removes some parts and inserts characters at some positions. These
// actions (skip/remove/insert) are stored as an array in the "ops" property.
this.ops = []
// An operation's baseLength is the length of every string the operation
// can be applied to.
this.baseLength = 0
// The targetLength is the length of every string that results from applying
// the operation on a valid input string.
this.targetLength = 0
}
/**
* Length of the longest file that we'll attempt to edit, in characters.
*
* @type {number}
*/
TextOperation.MAX_STRING_LENGTH = 2 * Math.pow(1024, 2)
TextOperation.prototype.equals = function (other) {
if (this.baseLength !== other.baseLength) {
return false
}
if (this.targetLength !== other.targetLength) {
return false
}
if (this.ops.length !== other.ops.length) {
return false
}
for (let i = 0; i < this.ops.length; i++) {
if (this.ops[i] !== other.ops[i]) {
return false
}
}
return true
}
class UnprocessableError extends OError {}
TextOperation.UnprocessableError = UnprocessableError
class ApplyError extends UnprocessableError {
constructor(message, operation, operand) {
super(message, { operation, operand })
this.operation = operation
this.operand = operand
}
}
TextOperation.ApplyError = ApplyError
class InvalidInsertionError extends UnprocessableError {
constructor(str, operation) {
super('inserted text contains non BMP characters', { str, operation })
this.str = str
this.operation = operation
}
}
TextOperation.InvalidInsertionError = InvalidInsertionError
class TooLongError extends UnprocessableError {
constructor(operation, resultLength) {
super(`resulting string would be too long: ${resultLength}`, {
operation,
resultLength,
})
this.operation = operation
this.resultLength = resultLength
}
}
TextOperation.TooLongError = TooLongError
// Operation are essentially lists of ops. There are three types of ops:
//
// * Retain ops: Advance the cursor position by a given number of characters.
// Represented by positive ints.
// * Insert ops: Insert a given string at the current cursor position.
// Represented by strings.
// * Remove ops: Remove the next n characters. Represented by negative ints.
const isRetain = (TextOperation.isRetain = function (op) {
return typeof op === 'number' && op > 0
})
const isInsert = (TextOperation.isInsert = function (op) {
return typeof op === 'string'
})
const isRemove = (TextOperation.isRemove = function (op) {
return typeof op === 'number' && op < 0
})
// After an operation is constructed, the user of the library can specify the
// actions of an operation (skip/insert/remove) with these three builder
// methods. They all return the operation for convenient chaining.
// Skip over a given number of characters.
TextOperation.prototype.retain = function (n) {
if (typeof n !== 'number') {
throw new Error('retain expects an integer')
}
if (n === 0) {
return this
}
this.baseLength += n
this.targetLength += n
if (isRetain(this.ops[this.ops.length - 1])) {
// The last op is a retain op => we can merge them into one op.
this.ops[this.ops.length - 1] += n
} else {
// Create a new op.
this.ops.push(n)
}
return this
}
// Insert a string at the current position.
TextOperation.prototype.insert = function (str) {
if (typeof str !== 'string') {
throw new Error('insert expects a string')
}
if (containsNonBmpChars(str)) {
throw new TextOperation.InvalidInsertionError(str)
}
if (str === '') {
return this
}
this.targetLength += str.length
const ops = this.ops
if (isInsert(ops[ops.length - 1])) {
// Merge insert op.
ops[ops.length - 1] += str
} else if (isRemove(ops[ops.length - 1])) {
// It doesn't matter when an operation is applied whether the operation
// is remove(3), insert("something") or insert("something"), remove(3).
// Here we enforce that in this case, the insert op always comes first.
// This makes all operations that have the same effect when applied to
// a document of the right length equal in respect to the `equals` method.
if (isInsert(ops[ops.length - 2])) {
ops[ops.length - 2] += str
} else {
ops[ops.length] = ops[ops.length - 1]
ops[ops.length - 2] = str
}
} else {
ops.push(str)
}
return this
}
// Remove a string at the current position.
TextOperation.prototype.remove = function (n) {
if (typeof n === 'string') {
n = n.length
}
if (typeof n !== 'number') {
throw new Error('remove expects an integer or a string')
}
if (n === 0) {
return this
}
if (n > 0) {
n = -n
}
this.baseLength -= n
if (isRemove(this.ops[this.ops.length - 1])) {
this.ops[this.ops.length - 1] += n
} else {
this.ops.push(n)
}
return this
}
// Tests whether this operation has no effect.
TextOperation.prototype.isNoop = function () {
return (
this.ops.length === 0 || (this.ops.length === 1 && isRetain(this.ops[0]))
)
}
// Pretty printing.
TextOperation.prototype.toString = function () {
return this.ops
.map(op => {
if (isRetain(op)) {
return 'retain ' + op
} else if (isInsert(op)) {
return "insert '" + op + "'"
} else {
return 'remove ' + -op
}
})
.join(', ')
}
// Converts operation into a JSON value.
TextOperation.prototype.toJSON = function () {
return this.ops
}
// Converts a plain JS object into an operation and validates it.
TextOperation.fromJSON = function (ops) {
const o = new TextOperation()
for (let i = 0, l = ops.length; i < l; i++) {
const op = ops[i]
if (isRetain(op)) {
o.retain(op)
} else if (isInsert(op)) {
o.insert(op)
} else if (isRemove(op)) {
o.remove(op)
} else {
throw new Error(
'unknown operation: ' +
JSON.stringify(op) +
' in ' +
JSON.stringify(ops)
)
}
}
return o
}
// Apply an operation to a string, returning a new string. Throws an error if
// there's a mismatch between the input string and the operation.
TextOperation.prototype.apply = function (str) {
const operation = this
if (containsNonBmpChars(str)) {
throw new TextOperation.ApplyError(
'The string contains non BMP characters.',
operation,
str
)
}
if (str.length !== operation.baseLength) {
throw new TextOperation.ApplyError(
"The operation's base length must be equal to the string's length.",
operation,
str
)
}
// Build up the result string directly by concatenation (which is actually
// faster than joining arrays because it is optimised in v8).
let result = ''
let strIndex = 0
const ops = this.ops
for (let i = 0, l = ops.length; i < l; i++) {
const op = ops[i]
if (isRetain(op)) {
if (strIndex + op > str.length) {
throw new TextOperation.ApplyError(
"Operation can't retain more chars than are left in the string.",
operation,
str
)
}
// Copy skipped part of the old string.
result += str.slice(strIndex, strIndex + op)
strIndex += op
} else if (isInsert(op)) {
if (containsNonBmpChars(op)) {
throw new TextOperation.InvalidInsertionError(str, operation)
}
// Insert string.
result += op
} else {
// remove op
strIndex -= op
}
}
if (strIndex !== str.length) {
throw new TextOperation.ApplyError(
"The operation didn't operate on the whole string.",
operation,
str
)
}
if (result.length > TextOperation.MAX_STRING_LENGTH) {
throw new TextOperation.TooLongError(operation, result.length)
}
return result
}
/**
* Determine the effect of this operation on the length of the text.
*
* NB: This is an Overleaf addition to the original TextOperation.
*
* @param {number} length of the original string; non-negative
* @return {number} length of the new string; non-negative
*/
TextOperation.prototype.applyToLength = function (length) {
const operation = this
if (length !== operation.baseLength) {
throw new TextOperation.ApplyError(
"The operation's base length must be equal to the string's length.",
operation,
length
)
}
let newLength = 0
let strIndex = 0
const ops = this.ops
for (let i = 0, l = ops.length; i < l; i++) {
const op = ops[i]
if (isRetain(op)) {
if (strIndex + op > length) {
throw new TextOperation.ApplyError(
"Operation can't retain more chars than are left in the string.",
operation,
length
)
}
// Copy skipped part of the old string.
newLength += op
strIndex += op
} else if (isInsert(op)) {
// Insert string.
newLength += op.length
} else {
// remove op
strIndex -= op
}
}
if (strIndex !== length) {
throw new TextOperation.ApplyError(
"The operation didn't operate on the whole string.",
operation,
length
)
}
if (newLength > TextOperation.MAX_STRING_LENGTH) {
throw new TextOperation.TooLongError(operation, newLength)
}
return newLength
}
// Computes the inverse of an operation. The inverse of an operation is the
// operation that reverts the effects of the operation, e.g. when you have an
// operation 'insert("hello "); skip(6);' then the inverse is 'remove("hello ");
// skip(6);'. The inverse should be used for implementing undo.
TextOperation.prototype.invert = function (str) {
let strIndex = 0
const inverse = new TextOperation()
const ops = this.ops
for (let i = 0, l = ops.length; i < l; i++) {
const op = ops[i]
if (isRetain(op)) {
inverse.retain(op)
strIndex += op
} else if (isInsert(op)) {
inverse.remove(op.length)
} else {
// remove op
inverse.insert(str.slice(strIndex, strIndex - op))
strIndex -= op
}
}
return inverse
}
// When you use ctrl-z to undo your latest changes, you expect the program not
// to undo every single keystroke but to undo your last sentence you wrote at
// a stretch or the deletion you did by holding the backspace key down. This
// This can be implemented by composing operations on the undo stack. This
// method can help decide whether two operations should be composed. It
// returns true if the operations are consecutive insert operations or both
// operations delete text at the same position. You may want to include other
// factors like the time since the last change in your decision.
TextOperation.prototype.canBeComposedWithForUndo = function (other) {
if (this.isNoop() || other.isNoop()) {
return true
}
const startA = getStartIndex(this)
const startB = getStartIndex(other)
const simpleA = getSimpleOp(this)
const simpleB = getSimpleOp(other)
if (!simpleA || !simpleB) {
return false
}
if (isInsert(simpleA) && isInsert(simpleB)) {
return startA + simpleA.length === startB
}
if (isRemove(simpleA) && isRemove(simpleB)) {
// there are two possibilities to delete: with backspace and with the
// delete key.
return startB - simpleB === startA || startA === startB
}
return false
}
/**
* @inheritdoc
*/
TextOperation.prototype.canBeComposedWith = function (other) {
return this.targetLength === other.baseLength
}
// Compose merges two consecutive operations into one operation, that
// preserves the changes of both. Or, in other words, for each input string S
// and a pair of consecutive operations A and B,
// apply(apply(S, A), B) = apply(S, compose(A, B)) must hold.
TextOperation.prototype.compose = function (operation2) {
const operation1 = this
if (operation1.targetLength !== operation2.baseLength) {
throw new Error(
'The base length of the second operation has to be the ' +
'target length of the first operation'
)
}
const operation = new TextOperation() // the combined operation
const ops1 = operation1.ops
const ops2 = operation2.ops // for fast access
let i1 = 0
let i2 = 0 // current index into ops1 respectively ops2
let op1 = ops1[i1++]
let op2 = ops2[i2++] // current ops
for (;;) {
// Dispatch on the type of op1 and op2
if (typeof op1 === 'undefined' && typeof op2 === 'undefined') {
// end condition: both ops1 and ops2 have been processed
break
}
if (isRemove(op1)) {
operation.remove(op1)
op1 = ops1[i1++]
continue
}
if (isInsert(op2)) {
operation.insert(op2)
op2 = ops2[i2++]
continue
}
if (typeof op1 === 'undefined') {
throw new Error(
'Cannot compose operations: first operation is too short.'
)
}
if (typeof op2 === 'undefined') {
throw new Error('Cannot compose operations: first operation is too long.')
}
if (isRetain(op1) && isRetain(op2)) {
if (op1 > op2) {
operation.retain(op2)
op1 = op1 - op2
op2 = ops2[i2++]
} else if (op1 === op2) {
operation.retain(op1)
op1 = ops1[i1++]
op2 = ops2[i2++]
} else {
operation.retain(op1)
op2 = op2 - op1
op1 = ops1[i1++]
}
} else if (isInsert(op1) && isRemove(op2)) {
if (op1.length > -op2) {
op1 = op1.slice(-op2)
op2 = ops2[i2++]
} else if (op1.length === -op2) {
op1 = ops1[i1++]
op2 = ops2[i2++]
} else {
op2 = op2 + op1.length
op1 = ops1[i1++]
}
} else if (isInsert(op1) && isRetain(op2)) {
if (op1.length > op2) {
operation.insert(op1.slice(0, op2))
op1 = op1.slice(op2)
op2 = ops2[i2++]
} else if (op1.length === op2) {
operation.insert(op1)
op1 = ops1[i1++]
op2 = ops2[i2++]
} else {
operation.insert(op1)
op2 = op2 - op1.length
op1 = ops1[i1++]
}
} else if (isRetain(op1) && isRemove(op2)) {
if (op1 > -op2) {
operation.remove(op2)
op1 = op1 + op2
op2 = ops2[i2++]
} else if (op1 === -op2) {
operation.remove(op2)
op1 = ops1[i1++]
op2 = ops2[i2++]
} else {
operation.remove(op1)
op2 = op2 + op1
op1 = ops1[i1++]
}
} else {
throw new Error(
"This shouldn't happen: op1: " +
JSON.stringify(op1) +
', op2: ' +
JSON.stringify(op2)
)
}
}
return operation
}
function getSimpleOp(operation, fn) {
const ops = operation.ops
switch (ops.length) {
case 1:
return ops[0]
case 2:
return isRetain(ops[0]) ? ops[1] : isRetain(ops[1]) ? ops[0] : null
case 3:
if (isRetain(ops[0]) && isRetain(ops[2])) {
return ops[1]
}
}
return null
}
function getStartIndex(operation) {
if (isRetain(operation.ops[0])) {
return operation.ops[0]
}
return 0
}
// Transform takes two operations A and B that happened concurrently and
// produces two operations A' and B' (in an array) such that
// `apply(apply(S, A), B') = apply(apply(S, B), A')`. This function is the
// heart of OT.
TextOperation.transform = function (operation1, operation2) {
if (operation1.baseLength !== operation2.baseLength) {
throw new Error('Both operations have to have the same base length')
}
const operation1prime = new TextOperation()
const operation2prime = new TextOperation()
const ops1 = operation1.ops
const ops2 = operation2.ops
let i1 = 0
let i2 = 0
let op1 = ops1[i1++]
let op2 = ops2[i2++]
for (;;) {
// At every iteration of the loop, the imaginary cursor that both
// operation1 and operation2 have that operates on the input string must
// have the same position in the input string.
if (typeof op1 === 'undefined' && typeof op2 === 'undefined') {
// end condition: both ops1 and ops2 have been processed
break
}
// next two cases: one or both ops are insert ops
// => insert the string in the corresponding prime operation, skip it in
// the other one. If both op1 and op2 are insert ops, prefer op1.
if (isInsert(op1)) {
operation1prime.insert(op1)
operation2prime.retain(op1.length)
op1 = ops1[i1++]
continue
}
if (isInsert(op2)) {
operation1prime.retain(op2.length)
operation2prime.insert(op2)
op2 = ops2[i2++]
continue
}
if (typeof op1 === 'undefined') {
throw new Error(
'Cannot compose operations: first operation is too short.'
)
}
if (typeof op2 === 'undefined') {
throw new Error('Cannot compose operations: first operation is too long.')
}
let minl
if (isRetain(op1) && isRetain(op2)) {
// Simple case: retain/retain
if (op1 > op2) {
minl = op2
op1 = op1 - op2
op2 = ops2[i2++]
} else if (op1 === op2) {
minl = op2
op1 = ops1[i1++]
op2 = ops2[i2++]
} else {
minl = op1
op2 = op2 - op1
op1 = ops1[i1++]
}
operation1prime.retain(minl)
operation2prime.retain(minl)
} else if (isRemove(op1) && isRemove(op2)) {
// Both operations remove the same string at the same position. We don't
// need to produce any operations, we just skip over the remove ops and
// handle the case that one operation removes more than the other.
if (-op1 > -op2) {
op1 = op1 - op2
op2 = ops2[i2++]
} else if (op1 === op2) {
op1 = ops1[i1++]
op2 = ops2[i2++]
} else {
op2 = op2 - op1
op1 = ops1[i1++]
}
// next two cases: remove/retain and retain/remove
} else if (isRemove(op1) && isRetain(op2)) {
if (-op1 > op2) {
minl = op2
op1 = op1 + op2
op2 = ops2[i2++]
} else if (-op1 === op2) {
minl = op2
op1 = ops1[i1++]
op2 = ops2[i2++]
} else {
minl = -op1
op2 = op2 + op1
op1 = ops1[i1++]
}
operation1prime.remove(minl)
} else if (isRetain(op1) && isRemove(op2)) {
if (op1 > -op2) {
minl = -op2
op1 = op1 + op2
op2 = ops2[i2++]
} else if (op1 === -op2) {
minl = op1
op1 = ops1[i1++]
op2 = ops2[i2++]
} else {
minl = op1
op2 = op2 + op1
op1 = ops1[i1++]
}
operation2prime.remove(minl)
} else {
throw new Error("The two operations aren't compatible")
}
}
return [operation1prime, operation2prime]
}
module.exports = TextOperation

View file

@ -0,0 +1,54 @@
'use strict'
const assert = require('check-types').assert
// Dependencies are loaded at the bottom of the file to mitigate circular
// dependency
let RestoreOrigin = null
/**
* @constructor
* @param {string} kind
* @classdesc
* An Origin records where a {@link Change} came from. The Origin class handles
* simple tag origins, like "it came from rich text mode", or "it came from
* uploading files". Its subclasses record more detailed data for Changes such
* as restoring a version.
*/
function Origin(kind) {
assert.string(kind, 'Origin: bad kind')
this.kind = kind
}
/**
* Create an Origin from its raw form.
*
* @param {Object} [raw]
* @return {Origin | null}
*/
Origin.fromRaw = function originFromRaw(raw) {
if (!raw) return null
if (raw.kind === RestoreOrigin.KIND) return RestoreOrigin.fromRaw(raw)
return new Origin(raw.kind)
}
/**
* Convert the Origin to raw form for storage or transmission.
*
* @return {Object}
*/
Origin.prototype.toRaw = function originToRaw() {
return { kind: this.kind }
}
/**
* @return {string}
*/
Origin.prototype.getKind = function () {
return this.kind
}
module.exports = Origin
RestoreOrigin = require('./restore_origin')

View file

@ -0,0 +1,64 @@
'use strict'
const assert = require('check-types').assert
const Origin = require('./')
/**
* @classdesc
* When a {@link Change} is generated by restoring a previous version, this
* records the original version. We also store the timestamp of the restored
* version for display; technically, this is redundant, because we could
* recover it using the version ID. However, it would be very expensive to
* recover all referenced versions, and it is also possible that the change
* for the restored version will no longer exist, either because it was merged
* with other changes or was deleted.
*
* @see Origin
*/
class RestoreOrigin extends Origin {
/**
* @constructor
* @param {number} version that was restored
* @param {Date} timestamp from the restored version
*/
constructor(version, timestamp) {
assert.integer(version, 'RestoreOrigin: bad version')
assert.date(timestamp, 'RestoreOrigin: bad timestamp')
super(RestoreOrigin.KIND)
this.version = version
this.timestamp = timestamp
}
static fromRaw(raw) {
return new RestoreOrigin(raw.version, new Date(raw.timestamp))
}
/** @inheritdoc */
toRaw() {
return {
kind: RestoreOrigin.KIND,
version: this.version,
timestamp: this.timestamp.toISOString(),
}
}
/**
* @return {number}
*/
getVersion() {
return this.version
}
/**
* @return {Date}
*/
getTimestamp() {
return this.timestamp
}
}
RestoreOrigin.KIND = 'restore'
module.exports = RestoreOrigin

View file

@ -0,0 +1,237 @@
'use strict'
const _ = require('lodash')
const BPromise = require('bluebird')
const ChangeNote = require('./change_note')
const ChangeRequest = require('./change_request')
const Chunk = require('./chunk')
const Operation = require('./operation')
/**
* @class
* @classdesc
* Operational Transformation client.
*
* See OT.md for explanation.
*/
function OtClient(_projectId, _editor, _blobStore, _socket) {
const STATE_DISCONNECTED = 0
const STATE_LOADING = 1
const STATE_READY = 2
const STATE_WAITING = 3
let _version = null
let _state = STATE_DISCONNECTED
const _buffer = []
let _ackVersion = null
let _outstanding = []
let _pending = []
const _waiting = []
this.connect = function otClientConnect() {
switch (_state) {
case STATE_DISCONNECTED:
_state = STATE_LOADING
_socket.emit('authenticate', {
projectId: _projectId,
token: 'letmein',
})
break
default:
throw new Error('connect in state ' + _state)
}
}
/**
* The latest project version number for which the client can construct the
* project content.
*
* @return {number} non-negative
*/
this.getVersion = function () {
return _version
}
_socket.on('load', function otClientOnLoad(data) {
switch (_state) {
case STATE_LOADING: {
const chunk = Chunk.fromRaw(data)
const snapshot = chunk.getSnapshot()
snapshot.applyAll(chunk.getChanges(), { strict: true })
_version = chunk.getEndVersion()
// TODO: we can get remote changes here, so it's not correct to wait for
// the editor to load before transitioning to the READY state
_editor.load(snapshot).then(function () {
_state = STATE_READY
})
break
}
default:
throw new Error('loaded in state ' + _state)
}
})
//
// Local Operations
//
function sendOutstandingChange() {
const changeRequest = new ChangeRequest(_version, _outstanding)
_socket.emit('change', changeRequest.toRaw())
_state = STATE_WAITING
}
function sendLocalOperation(operation) {
_outstanding.push(operation)
sendOutstandingChange()
}
function queueLocalOperation(operation) {
_pending.push(operation)
}
this.handleLocalOperation = function otClientHandleLocalOperation(operation) {
switch (_state) {
case STATE_READY:
sendLocalOperation(operation)
break
case STATE_WAITING:
queueLocalOperation(operation)
break
default:
throw new Error('local operation in state ' + _state)
}
}
/**
* A promise that resolves when the project reaches the given version.
*
* @param {number} version non-negative
* @return {Promise}
*/
this.waitForVersion = function otClientWaitForVersion(version) {
if (!_waiting[version]) _waiting[version] = []
return new BPromise(function (resolve, reject) {
_waiting[version].push(resolve)
})
}
function resolveWaitingPromises() {
for (const version in _waiting) {
if (!Object.prototype.hasOwnProperty.call(_waiting, version)) continue
if (version > _version) continue
_waiting[version].forEach(function (resolve) {
resolve()
})
delete _waiting[version]
}
}
//
// Messages from Server
//
function advanceIfReady() {
if (_ackVersion !== null && _version === _ackVersion) {
_version += 1
_ackVersion = null
handleAckReady()
advanceIfReady()
return
}
const changeNotes = _.remove(_buffer, function (changeNote) {
return changeNote.getBaseVersion() === _version
})
if (changeNotes.length === 1) {
handleRemoteChangeReady(changeNotes[0].getChange())
_version += 1
advanceIfReady()
return
}
if (changeNotes.length !== 0) {
throw new Error('multiple remote changes in client version ' + _version)
}
}
function bufferRemoteChangeNote(changeNote) {
const version = changeNote.getBaseVersion()
if (_.find(_buffer, 'baseVersion', version)) {
throw new Error('multiple changes in version ' + version)
}
if (version === _ackVersion) {
throw new Error('received change that was acked in ' + _ackVersion)
}
_buffer.push(changeNote)
}
function handleAckReady() {
// console.log('handleAckReady')
if (_outstanding.length === 0) {
throw new Error('ack complete without outstanding change')
}
if (_state !== STATE_WAITING) {
throw new Error('ack complete in state ' + _state)
}
_editor.handleChangeAcknowledged()
resolveWaitingPromises()
if (_pending.length > 0) {
_outstanding = _pending
_pending = []
sendOutstandingChange()
} else {
_outstanding = []
_state = STATE_READY
}
}
function handleRemoteChangeReady(change) {
if (_pending.length > 0) {
if (_outstanding.length === 0) {
throw new Error('pending change without outstanding change')
}
}
Operation.transformMultiple(_outstanding, change.getOperations())
Operation.transformMultiple(_pending, change.getOperations())
_editor.applyRemoteChange(change)
}
_socket.on('ack', function otClientOnAck(data) {
switch (_state) {
case STATE_WAITING: {
const changeNote = ChangeNote.fromRaw(data)
_ackVersion = changeNote.getBaseVersion()
advanceIfReady()
break
}
default:
throw new Error('ack in state ' + _state)
}
})
_socket.on('change', function otClientOnChange(data) {
switch (_state) {
case STATE_READY:
case STATE_WAITING:
bufferRemoteChangeNote(ChangeNote.fromRaw(data))
advanceIfReady()
break
default:
throw new Error('remote change in state ' + _state)
}
})
//
// Connection State
// TODO: socket.io error handling
//
_socket.on('disconnect', function () {
_state = STATE_DISCONNECTED
// eslint-disable-next-line no-console
console.log('disconnected') // TODO: how do we handle disconnect?
})
}
module.exports = OtClient

View file

@ -0,0 +1,91 @@
/** @module */
'use strict'
const path = require('path')
/**
* Regular expressions for Overleaf v2 taken from
* https://github.com/sharelatex/web-sharelatex/blob/master/app/coffee/Features/Project/SafePath.coffee
*/
//
// Regex of characters that are invalid in filenames
//
// eslint-disable-next-line no-control-regex
const BAD_CHAR_RX = /[/*\u0000-\u001F\u007F\u0080-\u009F\uD800-\uDFFF]/g
//
// Regex of filename patterns that are invalid ("." ".." and leading/trailing
// whitespace)
//
const BAD_FILE_RX = /(^\.$)|(^\.\.$)|(^\s+)|(\s+$)/g
//
// Put a block on filenames which match javascript property names, as they
// can cause exceptions where the code puts filenames into a hash. This is a
// temporary workaround until the code in other places is made safe against
// property names.
//
// See https://github.com/overleaf/write_latex/wiki/Using-javascript-Objects-as-Maps
//
const BLOCKED_FILE_RX =
/^(prototype|constructor|toString|toLocaleString|valueOf|hasOwnProperty|isPrototypeOf|propertyIsEnumerable|__defineGetter__|__lookupGetter__|__defineSetter__|__lookupSetter__|__proto__)$/
//
// Maximum path length, in characters. This is fairly arbitrary.
//
const MAX_PATH = 1024
/**
* Replace invalid characters and filename patterns in a filename with
* underscores.
*/
function cleanPart(filename) {
filename = filename.replace(BAD_CHAR_RX, '_')
filename = filename.replace(BAD_FILE_RX, function (match) {
return new Array(match.length + 1).join('_')
})
return filename
}
/**
* All pathnames in a Snapshot must be clean. We want pathnames that:
*
* 1. are unambiguous (e.g. no `.`s or redundant path separators)
* 2. do not allow directory traversal attacks (e.g. no `..`s or absolute paths)
* 3. do not contain leading/trailing space
* 4. do not contain the character '*' in filenames
*
* We normalise the pathname, split it by the separator and then clean each part
* as a filename
*
* @param {string} pathname
* @return {String}
*/
exports.clean = function (pathname) {
pathname = path.normalize(pathname)
pathname = pathname.replace(/\\/g, '/') // workaround for IE
pathname = pathname.replace(/\/+/g, '/') // no multiple slashes
pathname = pathname.replace(/^(\/.*)$/, '_$1') // no leading /
pathname = pathname.replace(/^(.+)\/$/, '$1') // no trailing /
pathname = pathname.replace(/^ *(.*)$/, '$1') // no leading spaces
pathname = pathname.replace(/^(.*[^ ]) *$/, '$1') // no trailing spaces
if (pathname.length === 0) pathname = '_'
pathname = pathname.split('/').map(cleanPart).join('/')
pathname = pathname.replace(BLOCKED_FILE_RX, '@$1')
return pathname
}
/**
* A pathname is clean (see clean) and not too long.
*
* @param {string} pathname
* @return {Boolean}
*/
exports.isClean = function pathnameIsClean(pathname) {
return (
exports.clean(pathname) === pathname &&
pathname.length <= MAX_PATH &&
pathname.length > 0
)
}

View file

@ -0,0 +1,240 @@
'use strict'
const assert = require('check-types').assert
const BPromise = require('bluebird')
const OError = require('@overleaf/o-error')
const FileMap = require('./file_map')
const V2DocVersions = require('./v2_doc_versions')
/**
* @typedef {import("./types").BlobStore} BlobStore
* @typedef {import("./change")} Change
* @typedef {import("./operation/text_operation")} TextOperation
*/
/**
* @classdesc A Snapshot represents the state of a {@link Project} at a
* particular version.
*/
class Snapshot {
static fromRaw(raw) {
assert.object(raw.files, 'bad raw.files')
return new Snapshot(
FileMap.fromRaw(raw.files),
raw.projectVersion,
V2DocVersions.fromRaw(raw.v2DocVersions)
)
}
toRaw() {
const raw = {
files: this.fileMap.toRaw(),
}
if (this.projectVersion) raw.projectVersion = this.projectVersion
if (this.v2DocVersions) raw.v2DocVersions = this.v2DocVersions.toRaw()
return raw
}
/**
* @constructor
* @param {FileMap} [fileMap]
* @param {string} [projectVersion]
* @param {V2DocVersions} [v2DocVersions]
*/
constructor(fileMap, projectVersion, v2DocVersions) {
assert.maybe.instance(fileMap, FileMap, 'bad fileMap')
this.fileMap = fileMap || new FileMap({})
this.projectVersion = projectVersion
this.v2DocVersions = v2DocVersions
}
/**
* @return {string | null | undefined}
*/
getProjectVersion() {
return this.projectVersion
}
setProjectVersion(projectVersion) {
assert.maybe.match(
projectVersion,
Snapshot.PROJECT_VERSION_RX,
'Snapshot: bad projectVersion'
)
this.projectVersion = projectVersion
}
/**
* @return {V2DocVersions | null | undefined}
*/
getV2DocVersions() {
return this.v2DocVersions
}
setV2DocVersions(v2DocVersions) {
assert.maybe.instance(
v2DocVersions,
V2DocVersions,
'Snapshot: bad v2DocVersions'
)
this.v2DocVersions = v2DocVersions
}
updateV2DocVersions(v2DocVersions) {
// merge new v2DocVersions into this.v2DocVersions
v2DocVersions.applyTo(this)
}
/**
* The underlying file map.
* @return {FileMap}
*/
getFileMap() {
return this.fileMap
}
/**
* The pathnames of all of the files.
*
* @return {Array.<string>} in no particular order
*/
getFilePathnames() {
return this.fileMap.getPathnames()
}
/**
* Get a File by its pathname.
* @see FileMap#getFile
*/
getFile(pathname) {
return this.fileMap.getFile(pathname)
}
/**
* Add the given file to the snapshot.
* @see FileMap#addFile
*/
addFile(pathname, file) {
this.fileMap.addFile(pathname, file)
}
/**
* Move or remove a file.
* @see FileMap#moveFile
*/
moveFile(pathname, newPathname) {
this.fileMap.moveFile(pathname, newPathname)
}
/**
* The number of files in the snapshot.
*
* @return {number}
*/
countFiles() {
return this.fileMap.countFiles()
}
/**
* Edit the content of an editable file.
*
* Throws an error if no file with the given name exists.
*
* @param {string} pathname
* @param {TextOperation} textOperation
*/
editFile(pathname, textOperation) {
const file = this.fileMap.getFile(pathname)
if (!file) {
throw new Snapshot.EditMissingFileError(
`can't find file for editing: ${pathname}`
)
}
file.edit(textOperation)
}
/**
* Apply all changes in sequence. Modifies the snapshot in place.
*
* Ignore recoverable errors (caused by historical bad data) unless opts.strict is true
*
* @param {Change[]} changes
* @param {object} opts
* @param {boolean} opts.strict - do not ignore recoverable errors
*/
applyAll(changes, opts) {
for (const change of changes) {
change.applyTo(this, opts)
}
}
/**
* If the Files in this Snapshot reference blob hashes, add them to the given
* set.
*
* @param {Set.<String>} blobHashes
*/
findBlobHashes(blobHashes) {
// eslint-disable-next-line array-callback-return
this.fileMap.map(file => {
const hash = file.getHash()
if (hash) blobHashes.add(hash)
})
}
/**
* Load all of the files in this snapshot.
*
* @param {string} kind see {File#load}
* @param {BlobStore} blobStore
* @return {Promise}
*/
loadFiles(kind, blobStore) {
return BPromise.props(this.fileMap.map(file => file.load(kind, blobStore)))
}
/**
* Store each of the files in this snapshot and return the raw snapshot for
* long term storage.
*
* @param {BlobStore} blobStore
* @param {number} [concurrency]
* @return {Promise.<Object>}
*/
store(blobStore, concurrency) {
assert.maybe.number(concurrency, 'bad concurrency')
const projectVersion = this.projectVersion
const rawV2DocVersions = this.v2DocVersions
? this.v2DocVersions.toRaw()
: undefined
return this.fileMap
.mapAsync(file => file.store(blobStore), concurrency)
.then(rawFiles => {
return {
files: rawFiles,
projectVersion,
v2DocVersions: rawV2DocVersions,
}
})
}
/**
* Create a deep clone of this snapshot.
*
* @return {Snapshot}
*/
clone() {
return Snapshot.fromRaw(this.toRaw())
}
}
class EditMissingFileError extends OError {}
Snapshot.EditMissingFileError = EditMissingFileError
Snapshot.PROJECT_VERSION_RX_STRING = '^[0-9]+\\.[0-9]+$'
Snapshot.PROJECT_VERSION_RX = new RegExp(Snapshot.PROJECT_VERSION_RX_STRING)
module.exports = Snapshot

View file

@ -0,0 +1,13 @@
import Blob from './blob'
import BPromise from 'bluebird'
export type BlobStore = {
getString(hash: string): BPromise<string>
putString(content: string): BPromise<Blob>
}
export type StringFileRawData = {
content: string
}
export type RawV2DocVersions = Record<string, { pathname: string; v: number }>

View file

@ -0,0 +1,13 @@
/*
* Misc functions
*/
'use strict'
/*
* return true/false if the given string contains non-BMP chars
*/
exports.containsNonBmpChars = function utilContainsNonBmpChars(str) {
// check for first (high) surrogate in a non-BMP character
return /[\uD800-\uDBFF]/.test(str)
}

View file

@ -0,0 +1,55 @@
'use strict'
const _ = require('lodash')
/**
* @typedef {import("./file")} File
* @typedef {import("./types").RawV2DocVersions} RawV2DocVersions
*/
/**
* @constructor
* @param {RawV2DocVersions} data
* @classdesc
*/
function V2DocVersions(data) {
this.data = data || {}
}
V2DocVersions.fromRaw = function v2DocVersionsFromRaw(raw) {
if (!raw) return undefined
return new V2DocVersions(raw)
}
/**
* @abstract
*/
V2DocVersions.prototype.toRaw = function () {
if (!this.data) return null
const raw = _.clone(this.data)
return raw
}
/**
* Clone this object.
*
* @return {V2DocVersions} a new object of the same type
*/
V2DocVersions.prototype.clone = function v2DocVersionsClone() {
return V2DocVersions.fromRaw(this.toRaw())
}
V2DocVersions.prototype.applyTo = function v2DocVersionsApplyTo(snapshot) {
// Only update the snapshot versions if we have new versions
if (!_.size(this.data)) return
// Create v2DocVersions in snapshot if it does not exist
// otherwise update snapshot v2docversions
if (!snapshot.v2DocVersions) {
snapshot.v2DocVersions = this.clone()
} else {
_.assign(snapshot.v2DocVersions.data, this.data)
}
}
module.exports = V2DocVersions

View file

@ -0,0 +1,31 @@
{
"name": "overleaf-editor-core",
"version": "1.0.0",
"description": "Library shared between the editor server and clients.",
"main": "index.js",
"scripts": {
"test": "mocha",
"format": "prettier --list-different $PWD/'**/*.js'",
"format:fix": "prettier --write $PWD/'**/*.js'",
"lint": "eslint --max-warnings 0 --format unix lib test && tsc",
"lint:fix": "eslint --fix lib test",
"test:ci": "npm run test",
"coverage": "istanbul cover _mocha"
},
"author": "team@overleaf.com",
"license": "Proprietary",
"private": true,
"devDependencies": {
"@types/bluebird": "^3.5.30",
"chai": "^3.3.0",
"istanbul": "^0.4.5",
"mocha": "^6.1.4",
"typescript": "^4.5.5"
},
"dependencies": {
"@overleaf/o-error": "*",
"bluebird": "^3.1.1",
"check-types": "^5.1.0",
"lodash": "^4.17.19"
}
}

View file

@ -0,0 +1,37 @@
'use strict'
const { expect } = require('chai')
const core = require('..')
const Change = core.Change
const File = core.File
const Operation = core.Operation
describe('Change', function () {
describe('findBlobHashes', function () {
it('finds blob hashes from operations', function () {
const blobHashes = new Set()
const change = Change.fromRaw({
operations: [],
timestamp: '2015-03-05T12:03:53.035Z',
authors: [null],
})
change.findBlobHashes(blobHashes)
expect(blobHashes.size).to.equal(0)
// AddFile with content doesn't have a hash.
change.pushOperation(Operation.addFile('a.txt', File.fromString('a')))
change.findBlobHashes(blobHashes)
expect(blobHashes.size).to.equal(0)
// AddFile with hash should give us a hash.
change.pushOperation(
Operation.addFile('b.txt', File.fromHash(File.EMPTY_FILE_HASH))
)
change.findBlobHashes(blobHashes)
expect(blobHashes.size).to.equal(1)
expect(blobHashes.has(File.EMPTY_FILE_HASH)).to.be.true
})
})
})

View file

@ -0,0 +1,80 @@
'use strict'
const { expect } = require('chai')
const ot = require('..')
const File = ot.File
const Operation = ot.Operation
const TextOperation = ot.TextOperation
describe('EditFileOperation', function () {
function edit(pathname, textOperationJsonObject) {
return Operation.editFile(
pathname,
TextOperation.fromJSON(textOperationJsonObject)
)
}
describe('canBeComposedWith', function () {
it('on the same file', function () {
const editFileOperation1 = edit('foo.tex', ['x'])
const editFileOperation2 = edit('foo.tex', [1, 'y'])
expect(editFileOperation1.canBeComposedWith(editFileOperation2)).to.be
.true
})
it('on different files', function () {
const editFileOperation1 = edit('foo.tex', ['x'])
const editFileOperation2 = edit('bar.tex', ['y'])
expect(editFileOperation1.canBeComposedWith(editFileOperation2)).to.be
.false
})
it('with a different type of opperation', function () {
const editFileOperation1 = edit('foo.tex', ['x'])
const editFileOperation2 = Operation.addFile(
'bar.tex',
File.fromString('')
)
expect(editFileOperation1.canBeComposedWith(editFileOperation2)).to.be
.false
})
it('with incompatible lengths', function () {
const editFileOperation1 = edit('foo.tex', ['x'])
const editFileOperation2 = edit('foo.tex', [2, 'y'])
expect(editFileOperation1.canBeComposedWith(editFileOperation2)).to.be
.false
})
})
describe('canBeComposedWithForUndo', function () {
it('can', function () {
const editFileOperation1 = edit('foo.tex', ['x'])
const editFileOperation2 = edit('foo.tex', [1, 'y'])
expect(editFileOperation1.canBeComposedWithForUndo(editFileOperation2)).to
.be.true
})
it('cannot', function () {
const editFileOperation1 = edit('foo.tex', ['x'])
const editFileOperation2 = edit('foo.tex', ['y', 1, 'z'])
expect(editFileOperation1.canBeComposedWithForUndo(editFileOperation2)).to
.be.false
})
})
describe('compose', function () {
it('composes text operations', function () {
const editFileOperation1 = edit('foo.tex', ['x'])
const editFileOperation2 = edit('foo.tex', [1, 'y'])
const composedFileOperation =
editFileOperation1.compose(editFileOperation2)
const expectedComposedFileOperation = edit('foo.tex', ['xy'])
expect(composedFileOperation).to.deep.equal(expectedComposedFileOperation)
// check that the original operation wasn't modified
expect(editFileOperation1).to.deep.equal(edit('foo.tex', ['x']))
})
})
})

View file

@ -0,0 +1,94 @@
'use strict'
const { expect } = require('chai')
const FakeBlobStore = require('./support/fake_blob_store')
const ot = require('..')
const File = ot.File
describe('File', function () {
it('can have attached metadata', function () {
// no metadata
let file = File.fromString('foo')
expect(file.getMetadata()).to.eql({})
// metadata passed in at construction time
file = File.fromString('foo', { main: true })
expect(file.getMetadata()).to.eql({ main: true })
// metadata set at runtime
file.setMetadata({ main: false })
expect(file.getMetadata()).to.eql({ main: false })
})
describe('toRaw', function () {
it('returns non-empty metadata', function () {
const metadata = { main: true }
const file = File.fromHash(File.EMPTY_FILE_HASH, metadata)
expect(file.toRaw()).to.eql({
hash: File.EMPTY_FILE_HASH,
metadata: metadata,
})
delete file.getMetadata().main
expect(file.toRaw()).to.eql({ hash: File.EMPTY_FILE_HASH })
})
it('returns a deep clone of metadata', function () {
const metadata = { externalFile: { id: 123 } }
const file = File.fromHash(File.EMPTY_FILE_HASH, metadata)
const raw = file.toRaw()
const fileMetadata = file.getMetadata()
const rawMetadata = raw.metadata
expect(rawMetadata).not.to.equal(fileMetadata)
expect(rawMetadata).to.deep.equal(fileMetadata)
})
})
describe('store', function () {
it('does not return empty metadata', function () {
const file = File.fromHash(File.EMPTY_FILE_HASH)
const fakeBlobStore = new FakeBlobStore()
return file.store(fakeBlobStore).then(raw => {
expect(raw).to.eql({ hash: File.EMPTY_FILE_HASH })
})
})
it('returns non-empty metadata', function () {
const metadata = { main: true }
const file = File.fromHash(File.EMPTY_FILE_HASH, metadata)
const fakeBlobStore = new FakeBlobStore()
return file.store(fakeBlobStore).then(raw => {
expect(raw).to.eql({
hash: File.EMPTY_FILE_HASH,
metadata: metadata,
})
})
})
it('returns a deep clone of metadata', function () {
const metadata = { externalFile: { id: 123 } }
const file = File.fromHash(File.EMPTY_FILE_HASH, metadata)
const fakeBlobStore = new FakeBlobStore()
return file.store(fakeBlobStore).then(raw => {
raw.metadata.externalFile.id = 456
expect(file.getMetadata().externalFile.id).to.equal(123)
})
})
})
describe('with string data', function () {
it('can be created from a string', function () {
const file = File.fromString('foo')
expect(file.getContent()).to.equal('foo')
})
})
describe('with hollow string data', function () {
it('can be cloned', function () {
const file = File.createHollow(null, 0)
expect(file.getStringLength()).to.equal(0)
const clone = file.clone()
expect(clone.getStringLength()).to.equal(0)
})
})
})

View file

@ -0,0 +1,202 @@
'use strict'
const { expect } = require('chai')
const BPromise = require('bluebird')
const _ = require('lodash')
const ot = require('..')
const File = ot.File
const FileMap = ot.FileMap
describe('FileMap', function () {
function makeTestFile(pathname) {
return File.fromString(pathname)
}
function makeTestFiles(pathnames) {
return _.zipObject(pathnames, _.map(pathnames, makeTestFile))
}
function makeFileMap(pathnames) {
return new FileMap(makeTestFiles(pathnames))
}
it('allows construction with a single file', function () {
makeFileMap(['a'])
})
it('allows folders to differ by case', function () {
expect(() => {
makeFileMap(['a/b', 'A/c'])
}).not.to.throw
expect(() => {
makeFileMap(['a/b/c', 'A/b/d'])
}).not.to.throw
expect(() => {
makeFileMap(['a/b/c', 'a/B/d'])
}).not.to.throw
})
it('does not allow conflicting paths on construct', function () {
expect(() => {
makeFileMap(['a', 'a/b'])
}).to.throw(FileMap.PathnameConflictError)
})
it('detects conflicting paths with characters that sort before /', function () {
const fileMap = makeFileMap(['a', 'a!'])
expect(fileMap.wouldConflict('a/b')).to.be.truthy
})
it('detects conflicting paths', function () {
const fileMap = makeFileMap(['a/b/c'])
expect(fileMap.wouldConflict('a/b/c/d')).to.be.truthy
expect(fileMap.wouldConflict('a')).to.be.truthy
expect(fileMap.wouldConflict('b')).to.be.falsy
expect(fileMap.wouldConflict('a/b')).to.be.truthy
expect(fileMap.wouldConflict('a/c')).to.be.falsy
expect(fileMap.wouldConflict('a/b/c')).to.be.falsy
expect(fileMap.wouldConflict('a/b/d')).to.be.falsy
expect(fileMap.wouldConflict('d/b/c')).to.be.falsy
})
it('allows paths that differ by case', function () {
const fileMap = makeFileMap(['a/b/c'])
expect(fileMap.wouldConflict('a/b/C')).to.be.falsy
expect(fileMap.wouldConflict('A')).to.be.falsy
expect(fileMap.wouldConflict('A/b')).to.be.falsy
expect(fileMap.wouldConflict('a/B')).to.be.falsy
expect(fileMap.wouldConflict('A/B')).to.be.falsy
})
it('does not add a file with a conflicting path', function () {
const fileMap = makeFileMap(['a/b'])
const file = makeTestFile('a/b/c')
expect(() => {
fileMap.addFile('a/b/c', file)
}).to.throw(FileMap.PathnameConflictError)
})
it('does not move a file to a conflicting path', function () {
const fileMap = makeFileMap(['a/b', 'a/c'])
expect(() => {
fileMap.moveFile('a/b', 'a')
}).to.throw(FileMap.PathnameConflictError)
})
it('errors when trying to move a non-existent file', function () {
const fileMap = makeFileMap(['a'])
expect(() => fileMap.moveFile('b', 'a')).to.throw(FileMap.FileNotFoundError)
})
it('moves a file over an empty folder', function () {
const fileMap = makeFileMap(['a/b'])
fileMap.moveFile('a/b', 'a')
expect(fileMap.countFiles()).to.equal(1)
expect(fileMap.getFile('a')).to.exist
expect(fileMap.getFile('a').getContent()).to.equal('a/b')
})
it('does not move a file over a non-empty folder', function () {
const fileMap = makeFileMap(['a/b', 'a/c'])
expect(() => {
fileMap.moveFile('a/b', 'a')
}).to.throw(FileMap.PathnameConflictError)
})
it('does not overwrite filename that differs by case on add', function () {
const fileMap = makeFileMap(['a'])
fileMap.addFile('A', makeTestFile('A'))
expect(fileMap.countFiles()).to.equal(2)
expect(fileMap.files.a).to.exist
expect(fileMap.files.A).to.exist
expect(fileMap.getFile('a')).to.exist
expect(fileMap.getFile('A').getContent()).to.equal('A')
})
it('changes case on move', function () {
const fileMap = makeFileMap(['a'])
fileMap.moveFile('a', 'A')
expect(fileMap.countFiles()).to.equal(1)
expect(fileMap.files.a).not.to.exist
expect(fileMap.files.A).to.exist
expect(fileMap.getFile('A').getContent()).to.equal('a')
})
it('does not overwrite filename that differs by case on move', function () {
const fileMap = makeFileMap(['a', 'b'])
fileMap.moveFile('a', 'B')
expect(fileMap.countFiles()).to.equal(2)
expect(fileMap.files.a).not.to.exist
expect(fileMap.files.b).to.exist
expect(fileMap.files.B).to.exist
expect(fileMap.getFile('B').getContent()).to.equal('a')
})
it('does not find pathname that differs by case', function () {
const fileMap = makeFileMap(['a'])
expect(fileMap.getFile('a')).to.exist
expect(fileMap.getFile('A')).not.to.exist
expect(fileMap.getFile('b')).not.to.exist
})
it('does not allow non-safe pathnames', function () {
expect(() => {
makeFileMap(['c*'])
}).to.throw(FileMap.BadPathnameError)
const fileMap = makeFileMap([])
expect(() => {
fileMap.addFile('c*', makeTestFile('c:'))
}).to.throw(FileMap.BadPathnameError)
fileMap.addFile('a', makeTestFile('a'))
expect(() => {
fileMap.moveFile('a', 'c*')
}).to.throw(FileMap.BadPathnameError)
expect(() => {
fileMap.addFile('hasOwnProperty', makeTestFile('hasOwnProperty'))
fileMap.addFile('anotherFile', makeTestFile('anotherFile'))
}).to.throw()
})
it('removes a file', function () {
const fileMap = makeFileMap(['a', 'b'])
fileMap.removeFile('a')
expect(fileMap.countFiles()).to.equal(1)
expect(fileMap.files.a).not.to.exist
expect(fileMap.files.b).to.exist
})
it('errors when trying to remove a non-existent file', function () {
const fileMap = makeFileMap(['a'])
expect(() => fileMap.removeFile('b')).to.throw(FileMap.FileNotFoundError)
})
it('has mapAsync', function () {
const concurrency = 1
return BPromise.map(
[
[[], {}],
[['a'], { a: 'a-a' }], // the test is to map to "content-pathname"
[['a', 'b'], { a: 'a-a', b: 'b-b' }],
],
test => {
const input = test[0]
const expectedOutput = test[1]
const fileMap = makeFileMap(input)
return fileMap
.mapAsync((file, pathname) => {
return file.getContent() + '-' + pathname
}, concurrency)
.then(result => {
expect(result).to.deep.equal(expectedOutput)
})
}
)
})
})

View file

@ -0,0 +1,42 @@
'use strict'
const { expect } = require('chai')
const core = require('..')
const Change = core.Change
const File = core.File
const History = core.History
const Operation = core.Operation
const Snapshot = core.Snapshot
describe('History', function () {
describe('findBlobHashes', function () {
it('finds blob hashes from snapshot and changes', function () {
const history = new History(new Snapshot(), [])
const blobHashes = new Set()
history.findBlobHashes(blobHashes)
expect(blobHashes.size).to.equal(0)
// Add a file with a hash to the snapshot.
history.getSnapshot().addFile('foo', File.fromHash(File.EMPTY_FILE_HASH))
history.findBlobHashes(blobHashes)
expect(Array.from(blobHashes)).to.have.members([File.EMPTY_FILE_HASH])
// Add a file with a hash to the changes.
const testHash = 'a'.repeat(40)
const change = Change.fromRaw({
operations: [],
timestamp: '2015-03-05T12:03:53.035Z',
authors: [null],
})
change.pushOperation(Operation.addFile('bar', File.fromHash(testHash)))
history.pushChanges([change])
history.findBlobHashes(blobHashes)
expect(Array.from(blobHashes)).to.have.members([
File.EMPTY_FILE_HASH,
testHash,
])
})
})
})

View file

@ -0,0 +1,22 @@
'use strict'
const { expect } = require('chai')
const ot = require('..')
const HollowStringFileData = require('../lib/file_data/hollow_string_file_data')
const TextOperation = ot.TextOperation
describe('HollowStringFileData', function () {
it('validates string length when edited', function () {
const maxLength = TextOperation.MAX_STRING_LENGTH
const fileData = new HollowStringFileData(maxLength)
expect(fileData.getStringLength()).to.equal(maxLength)
expect(() => {
fileData.edit(new TextOperation().retain(maxLength).insert('x'))
}).to.throw(TextOperation.TooLongError)
expect(fileData.getStringLength()).to.equal(maxLength)
fileData.edit(new TextOperation().retain(maxLength - 1).remove(1))
expect(fileData.getStringLength()).to.equal(maxLength - 1)
})
})

View file

@ -0,0 +1,17 @@
'use strict'
const { expect } = require('chai')
const ot = require('..')
const Label = ot.Label
describe('Label', function () {
it('can be created by an anonymous author', function () {
const label = Label.fromRaw({
text: 'test',
authorId: null,
timestamp: '2016-01-01T00:00:00Z',
version: 123,
})
expect(label.getAuthorId()).to.be.null
})
})

View file

@ -0,0 +1,98 @@
'use strict'
const _ = require('lodash')
const { expect } = require('chai')
const ot = require('..')
const File = ot.File
const TextOperation = ot.TextOperation
const LazyStringFileData = require('../lib/file_data/lazy_string_file_data')
describe('LazyStringFileData', function () {
it('uses raw text operations for toRaw and fromRaw', function () {
const testHash = File.EMPTY_FILE_HASH
const fileData = new LazyStringFileData(testHash, 0)
let roundTripFileData
expect(fileData.toRaw()).to.eql({
hash: testHash,
stringLength: 0,
})
roundTripFileData = LazyStringFileData.fromRaw(fileData.toRaw())
expect(roundTripFileData.getHash()).to.equal(testHash)
expect(roundTripFileData.getStringLength()).to.equal(0)
expect(roundTripFileData.getTextOperations()).to.have.length(0)
fileData.edit(new TextOperation().insert('a'))
expect(fileData.toRaw()).to.eql({
hash: testHash,
stringLength: 1,
textOperations: [['a']],
})
roundTripFileData = LazyStringFileData.fromRaw(fileData.toRaw())
expect(roundTripFileData.getHash()).not.to.exist // file has changed
expect(roundTripFileData.getStringLength()).to.equal(1)
expect(roundTripFileData.getTextOperations()).to.have.length(1)
expect(roundTripFileData.getTextOperations()[0].ops).to.have.length(1)
fileData.edit(new TextOperation().retain(1).insert('b'))
expect(fileData.toRaw()).to.eql({
hash: testHash,
stringLength: 2,
textOperations: [['a'], [1, 'b']],
})
roundTripFileData = LazyStringFileData.fromRaw(fileData.toRaw())
expect(roundTripFileData.getHash()).not.to.exist // file has changed
expect(roundTripFileData.getStringLength()).to.equal(2)
expect(roundTripFileData.getTextOperations()).to.have.length(2)
expect(roundTripFileData.getTextOperations()[0].ops).to.have.length(1)
expect(roundTripFileData.getTextOperations()[1].ops).to.have.length(2)
})
it('validates operations when edited', function () {
const testHash = File.EMPTY_FILE_HASH
const fileData = new LazyStringFileData(testHash, 0)
expect(fileData.getHash()).equal(testHash)
expect(fileData.getByteLength()).to.equal(0) // approximately
expect(fileData.getStringLength()).to.equal(0)
expect(fileData.getTextOperations()).to.have.length(0)
fileData.edit(new TextOperation().insert('a'))
expect(fileData.getHash()).not.to.exist
expect(fileData.getByteLength()).to.equal(1) // approximately
expect(fileData.getStringLength()).to.equal(1)
expect(fileData.getTextOperations()).to.have.length(1)
expect(() => {
fileData.edit(new TextOperation().retain(10))
}).to.throw(TextOperation.ApplyError)
expect(fileData.getHash()).not.to.exist
expect(fileData.getByteLength()).to.equal(1) // approximately
expect(fileData.getStringLength()).to.equal(1)
expect(fileData.getTextOperations()).to.have.length(1)
})
it('validates string length when edited', function () {
const testHash = File.EMPTY_FILE_HASH
const fileData = new LazyStringFileData(testHash, 0)
expect(fileData.getHash()).equal(testHash)
expect(fileData.getByteLength()).to.equal(0) // approximately
expect(fileData.getStringLength()).to.equal(0)
expect(fileData.getTextOperations()).to.have.length(0)
const longString = _.repeat('a', TextOperation.MAX_STRING_LENGTH)
fileData.edit(new TextOperation().insert(longString))
expect(fileData.getHash()).not.to.exist
expect(fileData.getByteLength()).to.equal(longString.length) // approximate
expect(fileData.getStringLength()).to.equal(longString.length)
expect(fileData.getTextOperations()).to.have.length(1)
expect(() => {
fileData.edit(new TextOperation().retain(longString.length).insert('x'))
}).to.throw(TextOperation.TooLongError)
expect(fileData.getHash()).not.to.exist
expect(fileData.getByteLength()).to.equal(longString.length) // approximate
expect(fileData.getStringLength()).to.equal(longString.length)
expect(fileData.getTextOperations()).to.have.length(1)
})
})

View file

@ -0,0 +1,42 @@
'use strict'
const { expect } = require('chai')
const ot = require('..')
const File = ot.File
const MoveFileOperation = ot.MoveFileOperation
const Snapshot = ot.Snapshot
describe('MoveFileOperation', function () {
function makeEmptySnapshot() {
return new Snapshot()
}
function makeOneFileSnapshot() {
const snapshot = makeEmptySnapshot()
snapshot.addFile('foo', File.fromString('test: foo'))
return snapshot
}
function makeTwoFileSnapshot() {
const snapshot = makeOneFileSnapshot()
snapshot.addFile('bar', File.fromString('test: bar'))
return snapshot
}
it('moves a file over another', function () {
const snapshot = makeOneFileSnapshot()
const operation = new MoveFileOperation('foo', 'bar')
operation.applyTo(snapshot)
expect(snapshot.countFiles()).to.equal(1)
expect(snapshot.getFile('bar').getContent()).to.equal('test: foo')
})
it('moves a file to another pathname', function () {
const snapshot = makeTwoFileSnapshot()
const operation = new MoveFileOperation('foo', 'a')
operation.applyTo(snapshot)
expect(snapshot.countFiles()).to.equal(2)
expect(snapshot.getFile('a').getContent()).to.equal('test: foo')
expect(snapshot.getFile('bar').getContent()).to.equal('test: bar')
})
})

View file

@ -0,0 +1,746 @@
'use strict'
const _ = require('lodash')
const { expect } = require('chai')
const ot = require('..')
const File = ot.File
const AddFileOperation = ot.AddFileOperation
const MoveFileOperation = ot.MoveFileOperation
const EditFileOperation = ot.EditFileOperation
const NoOperation = ot.NoOperation
const Operation = ot.Operation
const TextOperation = ot.TextOperation
const Snapshot = ot.Snapshot
describe('Operation', function () {
function makeEmptySnapshot() {
return new Snapshot()
}
function makeOneFileSnapshot() {
const snapshot = makeEmptySnapshot()
snapshot.addFile('foo', File.fromString(''))
return snapshot
}
function makeTwoFileSnapshot() {
const snapshot = makeOneFileSnapshot()
snapshot.addFile('bar', File.fromString('a'))
return snapshot
}
function addFile(pathname, content) {
return new AddFileOperation(pathname, File.fromString(content))
}
function roundTripOperation(operation) {
return Operation.fromRaw(operation.toRaw())
}
function deepCopySnapshot(snapshot) {
return Snapshot.fromRaw(snapshot.toRaw())
}
function runConcurrently(operation0, operation1, snapshot) {
const operations = [
// make sure they survive serialization
roundTripOperation(operation0),
roundTripOperation(operation1),
]
const primeOperations = Operation.transform(operation0, operation1)
const originalSnapshot = snapshot || makeEmptySnapshot()
const snapshotA = deepCopySnapshot(originalSnapshot)
const snapshotB = deepCopySnapshot(originalSnapshot)
operations[0].applyTo(snapshotA)
operations[1].applyTo(snapshotB)
primeOperations[0].applyTo(snapshotB)
primeOperations[1].applyTo(snapshotA)
expect(snapshotA).to.eql(snapshotB)
return {
snapshot: snapshotA,
operations: operations,
primeOperations: primeOperations,
log() {
console.log(this)
return this
},
expectNoTransform() {
expect(this.operations).to.eql(this.primeOperations)
return this
},
swap() {
return runConcurrently(operation1, operation0, originalSnapshot)
},
expectFiles(files) {
this.expectedFiles = files
expect(this.snapshot.countFiles()).to.equal(_.size(files))
_.forOwn(files, (expectedFile, pathname) => {
if (_.isString(expectedFile)) {
expectedFile = { content: expectedFile, metadata: {} }
}
const file = this.snapshot.getFile(pathname)
expect(file.getContent()).to.equal(expectedFile.content)
expect(file.getMetadata()).to.eql(expectedFile.metadata)
})
return this
},
expectSymmetry() {
if (!this.expectedFiles) {
throw new Error('must call expectFiles before expectSymmetry')
}
this.swap().expectFiles(this.expectedFiles)
return this
},
expectPrime(index, klass) {
expect(this.primeOperations[index]).to.be.an.instanceof(klass)
return this
},
tap(fn) {
fn.call(this)
return this
},
}
}
// shorthand for creating an edit operation
function edit(pathname, textOperationJsonObject) {
return Operation.editFile(
pathname,
TextOperation.fromJSON(textOperationJsonObject)
)
}
it('transforms AddFile-AddFile with different names', function () {
runConcurrently(addFile('foo', ''), addFile('bar', 'a'))
.expectNoTransform()
.expectFiles({ bar: 'a', foo: '' })
.expectSymmetry()
})
it('transforms AddFile-AddFile with same name', function () {
// the second file 'wins'
runConcurrently(addFile('foo', ''), addFile('foo', 'a'))
.expectFiles({ foo: 'a' })
// if the first add was committed first, the second add overwrites it
.expectPrime(1, AddFileOperation)
// if the second add was committed first, the first add becomes a no-op
.expectPrime(0, NoOperation)
.swap()
.expectFiles({ foo: '' })
})
it('transforms AddFile-MoveFile with no conflict', function () {
runConcurrently(
Operation.moveFile('foo', 'baz'),
addFile('bar', 'a'),
makeOneFileSnapshot()
)
.expectNoTransform()
.expectFiles({ bar: 'a', baz: '' })
.expectSymmetry()
})
it('transforms AddFile-MoveFile with move from new file', function () {
runConcurrently(
Operation.moveFile('foo', 'baz'),
addFile('foo', 'a'),
makeOneFileSnapshot()
)
.expectFiles({ baz: 'a' })
// if the move was committed first, the add overwrites it
.expectPrime(1, AddFileOperation)
// if the add was committed first, the move appears in the history
.expectPrime(0, MoveFileOperation)
.expectSymmetry()
})
it('transforms AddFile-MoveFile with move to new file', function () {
runConcurrently(
Operation.moveFile('foo', 'baz'),
addFile('baz', 'a'),
makeOneFileSnapshot()
)
.expectFiles({ baz: 'a' })
// if the move was committed first, the add overwrites it
.expectPrime(1, AddFileOperation)
// if the add was committed first, the move becomes a delete
.expectPrime(0, MoveFileOperation)
.tap(function () {
expect(this.primeOperations[0].isRemoveFile()).to.be.true
})
.expectSymmetry()
})
it('transforms AddFile-RemoveFile with no conflict', function () {
runConcurrently(
Operation.removeFile('foo'),
addFile('bar', 'a'),
makeOneFileSnapshot()
)
.expectNoTransform()
.expectFiles({ bar: 'a' })
.expectSymmetry()
})
it('transforms AddFile-RemoveFile that removes added file', function () {
runConcurrently(
Operation.removeFile('foo'),
addFile('foo', 'a'),
makeOneFileSnapshot()
)
.expectFiles({ foo: 'a' })
// if the move was committed first, the add overwrites it
.expectPrime(1, AddFileOperation)
// if the add was committed first, the move gets dropped
.expectPrime(0, NoOperation)
.expectSymmetry()
})
it('transforms AddFile-EditFile with no conflict', function () {
runConcurrently(
edit('foo', ['x']),
addFile('bar', 'a'),
makeOneFileSnapshot()
)
.expectNoTransform()
.expectFiles({ bar: 'a', foo: 'x' })
.expectSymmetry()
})
it('transforms AddFile-EditFile when new file is edited', function () {
runConcurrently(
edit('foo', ['x']),
addFile('foo', 'a'),
makeOneFileSnapshot()
)
.expectFiles({ foo: 'a' })
// if the edit was committed first, the add overwrites it
.expectPrime(1, AddFileOperation)
// if the add was committed first, the edit gets dropped
.expectPrime(0, NoOperation)
.expectSymmetry()
})
it('transforms AddFile-SetFileMetadata with no conflict', function () {
const testMetadata = { baz: 1 }
runConcurrently(
addFile('bar', 'a'),
Operation.setFileMetadata('foo', testMetadata),
makeOneFileSnapshot()
)
.expectNoTransform()
.expectFiles({ foo: { content: '', metadata: testMetadata }, bar: 'a' })
.expectSymmetry()
})
it('transforms AddFile-SetFileMetadata with same name', function () {
const testMetadata = { baz: 1 }
runConcurrently(
addFile('foo', 'x'),
Operation.setFileMetadata('foo', testMetadata),
makeEmptySnapshot()
)
.expectFiles({ foo: { content: 'x', metadata: testMetadata } })
.expectSymmetry()
})
it('transforms MoveFile-MoveFile with no conflict', function () {
runConcurrently(
Operation.moveFile('foo', 'baz'),
Operation.moveFile('bar', 'bat'),
makeTwoFileSnapshot()
)
.expectFiles({ bat: 'a', baz: '' })
.expectNoTransform()
.expectSymmetry()
})
it('transforms MoveFile-MoveFile same move foo->foo, foo->foo', function () {
runConcurrently(
Operation.moveFile('foo', 'foo'),
Operation.moveFile('foo', 'foo'),
makeOneFileSnapshot()
)
.expectFiles({ foo: '' })
.expectPrime(1, NoOperation)
.expectPrime(0, NoOperation)
.expectSymmetry()
})
it('transforms MoveFile-MoveFile no-op foo->foo, foo->bar', function () {
runConcurrently(
Operation.moveFile('foo', 'foo'),
Operation.moveFile('foo', 'bar'),
makeOneFileSnapshot()
)
.expectFiles({ bar: '' })
.expectPrime(1, MoveFileOperation)
.expectPrime(0, NoOperation)
.expectSymmetry()
})
it('transforms MoveFile-MoveFile no-op foo->foo, bar->foo', function () {
runConcurrently(
Operation.moveFile('foo', 'foo'),
Operation.moveFile('foo', 'bar'),
makeTwoFileSnapshot()
)
.expectFiles({ bar: '' })
.expectPrime(1, MoveFileOperation)
.expectPrime(0, NoOperation)
.expectSymmetry()
})
it('transforms MoveFile-MoveFile no-op foo->foo, bar->bar', function () {
runConcurrently(
Operation.moveFile('foo', 'foo'),
Operation.moveFile('bar', 'bar'),
makeTwoFileSnapshot()
)
.expectFiles({ bar: 'a', foo: '' })
.expectPrime(1, NoOperation)
.expectPrime(0, NoOperation)
.expectSymmetry()
})
it('transforms MoveFile-MoveFile same move foo->bar, foo->bar', function () {
runConcurrently(
Operation.moveFile('foo', 'bar'),
Operation.moveFile('foo', 'bar'),
makeOneFileSnapshot()
)
.expectFiles({ bar: '' })
.expectPrime(1, NoOperation)
.expectPrime(0, NoOperation)
.expectSymmetry()
})
it('transforms MoveFile-MoveFile opposite foo->bar, bar->foo', function () {
runConcurrently(
Operation.moveFile('foo', 'bar'),
Operation.moveFile('bar', 'foo'),
makeTwoFileSnapshot()
)
.expectFiles([])
.expectPrime(1, MoveFileOperation)
.expectPrime(0, MoveFileOperation)
.tap(function () {
expect(this.primeOperations[1].isRemoveFile()).to.be.true
expect(this.primeOperations[1].getPathname()).to.equal('bar')
expect(this.primeOperations[0].isRemoveFile()).to.be.true
expect(this.primeOperations[0].getPathname()).to.equal('foo')
})
.expectSymmetry()
})
it('transforms MoveFile-MoveFile no-op foo->foo, bar->baz', function () {
runConcurrently(
Operation.moveFile('foo', 'foo'),
Operation.moveFile('bar', 'baz'),
makeTwoFileSnapshot()
)
.expectFiles({ baz: 'a', foo: '' })
.expectPrime(1, MoveFileOperation)
.expectPrime(0, NoOperation)
.expectSymmetry()
})
it('transforms MoveFile-MoveFile diverge foo->bar, foo->baz', function () {
runConcurrently(
Operation.moveFile('foo', 'bar'),
Operation.moveFile('foo', 'baz'),
makeOneFileSnapshot()
)
.expectFiles({ baz: '' })
// if foo->bar was committed first, the second move becomes bar->baz
.expectPrime(1, MoveFileOperation)
// if foo->baz was committed first, the second move becomes a no-op
.expectPrime(0, NoOperation)
.tap(function () {
expect(this.primeOperations[1].getPathname()).to.equal('bar')
expect(this.primeOperations[1].getNewPathname()).to.equal('baz')
})
.swap()
.expectFiles({ bar: '' })
})
it('transforms MoveFile-MoveFile transitive foo->baz, bar->foo', function () {
runConcurrently(
Operation.moveFile('foo', 'baz'),
Operation.moveFile('bar', 'foo'),
makeTwoFileSnapshot()
)
.expectFiles({ baz: 'a' })
.expectPrime(1, MoveFileOperation)
.expectPrime(0, MoveFileOperation)
.expectSymmetry()
})
it('transforms MoveFile-MoveFile transitive foo->bar, bar->baz', function () {
runConcurrently(
Operation.moveFile('foo', 'bar'),
Operation.moveFile('bar', 'baz'),
makeTwoFileSnapshot()
)
.expectFiles({ baz: '' })
.expectPrime(1, MoveFileOperation)
.expectPrime(0, MoveFileOperation)
.expectSymmetry()
})
it('transforms MoveFile-MoveFile converge foo->baz, bar->baz', function () {
runConcurrently(
Operation.moveFile('foo', 'baz'),
Operation.moveFile('bar', 'baz'),
makeTwoFileSnapshot()
)
.expectFiles({ baz: 'a' })
.expectPrime(1, MoveFileOperation)
.expectPrime(0, MoveFileOperation)
.tap(function () {
// if foo->baz was committed first, we just apply the move
expect(this.primeOperations[1]).to.eql(this.operations[1])
// if bar->baz was committed first, the other move becomes a remove
expect(this.primeOperations[0].isRemoveFile()).to.be.true
expect(this.primeOperations[0].getPathname()).to.equal('foo')
})
.swap()
.expectFiles({ baz: '' })
})
it('transforms MoveFile-RemoveFile no-op foo->foo, foo->', function () {
runConcurrently(
Operation.moveFile('foo', 'foo'),
Operation.removeFile('foo'),
makeOneFileSnapshot()
)
.expectFiles([])
.expectPrime(1, MoveFileOperation)
.expectPrime(0, NoOperation)
.tap(function () {
expect(this.primeOperations[1].isRemoveFile()).to.be.true
})
.expectSymmetry()
})
it('transforms MoveFile-RemoveFile same move foo->, foo->', function () {
runConcurrently(
Operation.removeFile('foo'),
Operation.removeFile('foo'),
makeOneFileSnapshot()
)
.expectFiles([])
.expectPrime(1, NoOperation)
.expectPrime(0, NoOperation)
.expectSymmetry()
})
it('transforms MoveFile-RemoveFile no conflict foo->, bar->', function () {
runConcurrently(
Operation.removeFile('foo'),
Operation.removeFile('bar'),
makeTwoFileSnapshot()
)
.expectFiles([])
.expectNoTransform()
.expectSymmetry()
})
it('transforms MoveFile-RemoveFile no conflict foo->foo, bar->', function () {
runConcurrently(
Operation.moveFile('foo', 'foo'),
Operation.removeFile('bar'),
makeTwoFileSnapshot()
)
.expectFiles({ foo: '' })
.expectPrime(1, MoveFileOperation)
.expectPrime(0, NoOperation)
.tap(function () {
expect(this.primeOperations[1].isRemoveFile()).to.be.true
})
.expectSymmetry()
})
it('transforms MoveFile-RemoveFile transitive foo->, bar->foo', function () {
runConcurrently(
Operation.removeFile('foo'),
Operation.moveFile('bar', 'foo'),
makeTwoFileSnapshot()
)
.expectFiles([])
.expectPrime(1, MoveFileOperation)
.expectPrime(0, MoveFileOperation)
.tap(function () {
expect(this.primeOperations[1].isRemoveFile()).to.be.true
expect(this.primeOperations[1].getPathname()).to.equal('bar')
expect(this.primeOperations[0].isRemoveFile()).to.be.true
expect(this.primeOperations[0].getPathname()).to.equal('foo')
})
.expectSymmetry()
})
it('transforms MoveFile-RemoveFile transitive foo->bar, bar->', function () {
runConcurrently(
Operation.moveFile('foo', 'bar'),
Operation.removeFile('bar'),
makeTwoFileSnapshot()
)
.expectFiles({})
.expectPrime(1, MoveFileOperation)
.expectPrime(0, MoveFileOperation)
.tap(function () {
expect(this.primeOperations[1].isRemoveFile()).to.be.true
expect(this.primeOperations[1].getPathname()).to.equal('bar')
expect(this.primeOperations[0].isRemoveFile()).to.be.true
expect(this.primeOperations[0].getPathname()).to.equal('foo')
})
.expectSymmetry()
})
it('transforms MoveFile-EditFile with no conflict', function () {
runConcurrently(
Operation.moveFile('bar', 'baz'),
edit('foo', ['x']),
makeTwoFileSnapshot()
)
.expectFiles({ baz: 'a', foo: 'x' })
.expectNoTransform()
.expectSymmetry()
})
it('transforms MoveFile-EditFile with edit on pathname', function () {
runConcurrently(
Operation.moveFile('foo', 'bar'),
edit('foo', ['x']),
makeOneFileSnapshot()
)
.expectFiles({ bar: 'x' })
.expectPrime(1, EditFileOperation)
.expectPrime(0, MoveFileOperation)
.tap(function () {
expect(this.primeOperations[1].getPathname()).to.equal('bar')
expect(this.primeOperations[0].getPathname()).to.equal('foo')
expect(this.primeOperations[0].getNewPathname()).to.equal('bar')
})
.expectSymmetry()
})
it('transforms MoveFile-EditFile with edit on new pathname', function () {
runConcurrently(
Operation.moveFile('bar', 'foo'),
edit('foo', ['x']),
makeTwoFileSnapshot()
)
.expectFiles({ foo: 'a' })
.expectPrime(1, NoOperation)
.tap(function () {
expect(this.primeOperations[0]).to.eql(this.operations[0])
})
.expectSymmetry()
})
it('transforms MoveFile-EditFile with no-op move', function () {
runConcurrently(
Operation.moveFile('foo', 'foo'),
edit('foo', ['x']),
makeOneFileSnapshot()
)
.expectFiles({ foo: 'x' })
.expectNoTransform()
.expectSymmetry()
})
it('transforms MoveFile-SetFileMetadata with no conflict', function () {
const testMetadata = { baz: 1 }
runConcurrently(
Operation.moveFile('foo', 'baz'),
Operation.setFileMetadata('bar', testMetadata),
makeTwoFileSnapshot()
)
.expectNoTransform()
.expectFiles({ bar: { content: 'a', metadata: testMetadata }, baz: '' })
.expectSymmetry()
})
it('transforms MoveFile-SetFileMetadata with set on pathname', function () {
const testMetadata = { baz: 1 }
runConcurrently(
Operation.moveFile('foo', 'bar'),
Operation.setFileMetadata('foo', testMetadata),
makeOneFileSnapshot()
)
.expectFiles({ bar: { content: '', metadata: testMetadata } })
.expectSymmetry()
})
it('transforms MoveFile-SetFileMetadata w/ set on new pathname', function () {
const testMetadata = { baz: 1 }
runConcurrently(
Operation.moveFile('foo', 'bar'),
Operation.setFileMetadata('bar', testMetadata),
makeTwoFileSnapshot()
)
// move wins
.expectFiles({ bar: { content: '', metadata: {} } })
.expectSymmetry()
})
it('transforms MoveFile-SetFileMetadata with no-op move', function () {
const testMetadata = { baz: 1 }
runConcurrently(
Operation.moveFile('foo', 'foo'),
Operation.setFileMetadata('foo', testMetadata),
makeOneFileSnapshot()
)
.expectFiles({ foo: { content: '', metadata: testMetadata } })
.expectSymmetry()
})
it('transforms EditFile-EditFile with no conflict', function () {
runConcurrently(
edit('foo', ['x']),
edit('bar', [1, 'x']),
makeTwoFileSnapshot()
)
.expectFiles({ bar: 'ax', foo: 'x' })
.expectNoTransform()
.expectSymmetry()
})
it('transforms EditFile-EditFile on same file', function () {
runConcurrently(
edit('foo', ['x']),
edit('foo', ['y']),
makeOneFileSnapshot()
)
.expectFiles({ foo: 'xy' })
.expectPrime(1, EditFileOperation)
.expectPrime(0, EditFileOperation)
.tap(function () {
expect(this.primeOperations[1].getTextOperation().toJSON()).to.eql([
1,
'y',
])
expect(this.primeOperations[0].getTextOperation().toJSON()).to.eql([
'x',
1,
])
})
.swap()
.expectFiles({ foo: 'yx' })
})
it('transforms EditFile-RemoveFile with no conflict', function () {
runConcurrently(
edit('foo', ['x']),
Operation.removeFile('bar'),
makeTwoFileSnapshot()
)
.expectFiles({ foo: 'x' })
.expectNoTransform()
.expectSymmetry()
})
it('transforms EditFile-RemoveFile on same file', function () {
runConcurrently(
edit('foo', ['x']),
Operation.removeFile('foo'),
makeOneFileSnapshot()
)
.expectFiles({})
.expectSymmetry()
})
it('transforms EditFile-SetFileMetadata with no conflict', function () {
const testMetadata = { baz: 1 }
runConcurrently(
edit('foo', ['x']),
Operation.setFileMetadata('bar', testMetadata),
makeTwoFileSnapshot()
)
.expectNoTransform()
.expectFiles({
foo: { content: 'x', metadata: {} },
bar: { content: 'a', metadata: testMetadata },
})
.expectSymmetry()
})
it('transforms EditFile-SetFileMetadata on same file', function () {
const testMetadata = { baz: 1 }
runConcurrently(
edit('foo', ['x']),
Operation.setFileMetadata('foo', testMetadata),
makeOneFileSnapshot()
)
.expectNoTransform()
.expectFiles({ foo: { content: 'x', metadata: testMetadata } })
.expectSymmetry()
})
it('transforms SetFileMetadata-SetFileMetadata w/ no conflict', function () {
runConcurrently(
Operation.setFileMetadata('foo', { baz: 1 }),
Operation.setFileMetadata('bar', { baz: 2 }),
makeTwoFileSnapshot()
)
.expectNoTransform()
.expectFiles({
foo: { content: '', metadata: { baz: 1 } },
bar: { content: 'a', metadata: { baz: 2 } },
})
.expectSymmetry()
})
it('transforms SetFileMetadata-SetFileMetadata on same file', function () {
runConcurrently(
Operation.setFileMetadata('foo', { baz: 1 }),
Operation.setFileMetadata('foo', { baz: 2 }),
makeOneFileSnapshot()
)
// second op wins
.expectFiles({ foo: { content: '', metadata: { baz: 2 } } })
.swap()
// first op wins
.expectFiles({ foo: { content: '', metadata: { baz: 1 } } })
})
it('transforms SetFileMetadata-RemoveFile with no conflict', function () {
const testMetadata = { baz: 1 }
runConcurrently(
Operation.setFileMetadata('foo', testMetadata),
Operation.removeFile('bar'),
makeTwoFileSnapshot()
)
.expectNoTransform()
.expectFiles({ foo: { content: '', metadata: testMetadata } })
.expectSymmetry()
})
it('transforms SetFileMetadata-RemoveFile on same file', function () {
const testMetadata = { baz: 1 }
runConcurrently(
Operation.setFileMetadata('foo', testMetadata),
Operation.removeFile('foo'),
makeOneFileSnapshot()
)
.expectFiles({})
.expectSymmetry()
})
})

View file

@ -0,0 +1,113 @@
'use strict'
const { expect } = require('chai')
const ot = require('..')
const safePathname = ot.safePathname
describe('safePathname', function () {
function expectClean(input, output) {
// check expected output and also idempotency
const cleanedInput = safePathname.clean(input)
expect(cleanedInput).to.equal(output)
expect(safePathname.clean(cleanedInput)).to.equal(cleanedInput)
expect(safePathname.isClean(cleanedInput)).to.be.true
}
it('cleans pathnames', function () {
// preserve valid pathnames
expectClean('llama.jpg', 'llama.jpg')
expectClean('DSC4056.JPG', 'DSC4056.JPG')
// detects unclean pathnames
expect(safePathname.isClean('rm -rf /')).to.be.falsy
// replace invalid characters with underscores
expectClean('test-s*\u0001\u0002m\u0007st\u0008.jpg', 'test-s___m_st_.jpg')
// keep slashes, normalize paths, replace ..
expectClean('./foo', 'foo')
expectClean('../foo', '__/foo')
expectClean('foo/./bar', 'foo/bar')
expectClean('foo/../bar', 'bar')
expectClean('../../tricky/foo.bar', '__/__/tricky/foo.bar')
expectClean('foo/../../tricky/foo.bar', '__/tricky/foo.bar')
expectClean('foo/bar/../../tricky/foo.bar', 'tricky/foo.bar')
expectClean('foo/bar/baz/../../tricky/foo.bar', 'foo/tricky/foo.bar')
// remove illegal chars even when there is no extension
expectClean('**foo', '__foo')
// remove windows file paths
expectClean('c:\\temp\\foo.txt', 'c:/temp/foo.txt')
// do not allow a leading slash (relative paths only)
expectClean('/foo', '_/foo')
expectClean('//foo', '_/foo')
// do not allow multiple leading slashes
expectClean('//foo', '_/foo')
// do not allow a trailing slash
expectClean('/', '_')
expectClean('foo/', 'foo')
expectClean('foo.tex/', 'foo.tex')
// do not allow multiple trailing slashes
expectClean('//', '_')
expectClean('///', '_')
expectClean('foo//', 'foo')
// file and folder names that consist of . and .. are not OK
expectClean('.', '_')
expectClean('..', '__')
// we will allow name with more dots e.g. ... and ....
expectClean('...', '...')
expectClean('....', '....')
expectClean('foo/...', 'foo/...')
expectClean('foo/....', 'foo/....')
expectClean('foo/.../bar', 'foo/.../bar')
expectClean('foo/..../bar', 'foo/..../bar')
// leading dots are OK
expectClean('._', '._')
expectClean('.gitignore', '.gitignore')
// trailing dots are not OK on Windows but we allow them
expectClean('_.', '_.')
expectClean('foo/_.', 'foo/_.')
expectClean('foo/_./bar', 'foo/_./bar')
expectClean('foo/_../bar', 'foo/_../bar')
// spaces are allowed
expectClean('a b.png', 'a b.png')
// leading and trailing spaces are not OK
expectClean(' foo', 'foo')
expectClean(' foo', 'foo')
expectClean('foo ', 'foo')
expectClean('foo ', 'foo')
// reserved file names on Windows should not be OK, but we already have
// some in the old system, so have to allow them for now
expectClean('AUX', 'AUX')
expectClean('foo/AUX', 'foo/AUX')
expectClean('AUX/foo', 'AUX/foo')
// multiple dots are OK
expectClean('a.b.png', 'a.b.png')
expectClean('a.code.tex', 'a.code.tex')
// there's no particular reason to allow multiple slashes; sometimes people
// seem to rename files to URLs (https://domain/path) in an attempt to
// upload a file, and this results in an empty directory name
expectClean('foo//bar.png', 'foo/bar.png')
expectClean('foo///bar.png', 'foo/bar.png')
// Check javascript property handling
expectClean('foo/prototype', 'foo/prototype') // OK as part of a pathname
expectClean('prototype/test.txt', 'prototype/test.txt')
expectClean('prototype', '@prototype') // not OK as whole pathname
expectClean('hasOwnProperty', '@hasOwnProperty')
expectClean('**proto**', '@__proto__')
})
})

View file

@ -0,0 +1,92 @@
'use strict'
const { expect } = require('chai')
const {
File,
Snapshot,
TextOperation,
Change,
EditFileOperation,
} = require('..')
describe('Snapshot', function () {
describe('findBlobHashes', function () {
it('finds blob hashes from files', function () {
const snapshot = new Snapshot()
const blobHashes = new Set()
snapshot.findBlobHashes(blobHashes)
expect(blobHashes.size).to.equal(0)
// Add a file without a hash.
snapshot.addFile('foo', File.fromString(''))
snapshot.findBlobHashes(blobHashes)
expect(blobHashes.size).to.equal(0)
// Add a file with a hash.
snapshot.addFile('bar', File.fromHash(File.EMPTY_FILE_HASH))
snapshot.findBlobHashes(blobHashes)
expect(Array.from(blobHashes)).to.have.members([File.EMPTY_FILE_HASH])
})
})
describe('editFile', function () {
let snapshot
let operation
beforeEach(function () {
snapshot = new Snapshot()
snapshot.addFile('hello.txt', File.fromString('hello'))
operation = new TextOperation()
operation.retain(5)
operation.insert(' world!')
})
it('applies text operations to the file', function () {
snapshot.editFile('hello.txt', operation)
const file = snapshot.getFile('hello.txt')
expect(file.getContent()).to.equal('hello world!')
})
it('rejects text operations for nonexistent file', function () {
expect(() => {
snapshot.editFile('does-not-exist.txt', operation)
}).to.throw(Snapshot.EditMissingFileError)
})
})
describe('applyAll', function () {
let snapshot
let change
beforeEach(function () {
snapshot = new Snapshot()
snapshot.addFile('empty.txt', File.fromString(''))
const badTextOp = new TextOperation()
badTextOp.insert('FAIL!')
const goodTextOp = new TextOperation()
goodTextOp.insert('SUCCESS!')
change = new Change(
[
new EditFileOperation('missing.txt', badTextOp),
new EditFileOperation('empty.txt', goodTextOp),
],
new Date()
)
})
it('ignores recoverable errors', function () {
snapshot.applyAll([change])
const file = snapshot.getFile('empty.txt')
expect(file.getContent()).to.equal('SUCCESS!')
})
it('stops on recoverable errors in strict mode', function () {
expect(() => {
snapshot.applyAll([change], { strict: true })
}).to.throw(Snapshot.EditMissingFileError)
const file = snapshot.getFile('empty.txt')
expect(file.getContent()).to.equal('')
})
})
})

View file

@ -0,0 +1,37 @@
'use strict'
const { expect } = require('chai')
const _ = require('lodash')
const ot = require('..')
const StringFileData = require('../lib/file_data/string_file_data')
const TextOperation = ot.TextOperation
describe('StringFileData', function () {
it('throws when it contains non BMP chars', function () {
const content = '𝌆𝌆𝌆'
const fileData = new StringFileData(content)
const operation = new TextOperation()
operation.insert('aa')
expect(() => {
fileData.edit(operation)
}).to.throw(TextOperation.ApplyError, /string contains non BMP characters/)
})
it('validates string length when edited', function () {
const longString = _.repeat('a', TextOperation.MAX_STRING_LENGTH)
const fileData = new StringFileData(longString)
expect(fileData.getByteLength()).to.equal(longString.length)
expect(fileData.getStringLength()).to.equal(longString.length)
expect(() => {
fileData.edit(new TextOperation().retain(longString.length).insert('x'))
}).to.throw(TextOperation.TooLongError)
expect(fileData.getByteLength()).to.equal(longString.length)
expect(fileData.getStringLength()).to.equal(longString.length)
fileData.edit(new TextOperation().retain(longString.length - 1).remove(1))
expect(fileData.getByteLength()).to.equal(longString.length - 1)
expect(fileData.getStringLength()).to.equal(longString.length - 1)
})
})

View file

@ -0,0 +1,35 @@
/**
* @typedef {import("../..").Blob } Blob
*/
/**
* @template T
* @typedef {import("bluebird")<T>} BPromise
*/
/**
* Fake blob store for tests
*/
class FakeBlobStore {
/**
* Get a string from the blob store
*
* @param {string} hash
* @return {BPromise<string>}
*/
getString(hash) {
throw new Error('Not implemented')
}
/**
* Store a string in the blob store
*
* @param {string} content
* @return {BPromise<Blob>}
*/
putString(content) {
throw new Error('Not implemented')
}
}
module.exports = FakeBlobStore

View file

@ -0,0 +1,38 @@
//
// Randomised testing helpers from OT.js:
// https://github.com/Operational-Transformation/ot.js/blob/
// 8873b7e28e83f9adbf6c3a28ec639c9151a838ae/test/helpers.js
//
'use strict'
function randomInt(n) {
return Math.floor(Math.random() * n)
}
function randomString(n) {
let str = ''
while (n--) {
if (Math.random() < 0.15) {
str += '\n'
} else {
const chr = randomInt(26) + 97
str += String.fromCharCode(chr)
}
}
return str
}
function randomElement(arr) {
return arr[randomInt(arr.length)]
}
function randomTest(numTrials, test) {
return function () {
while (numTrials--) test()
}
}
exports.int = randomInt
exports.string = randomString
exports.element = randomElement
exports.test = randomTest

View file

@ -0,0 +1,269 @@
//
// These tests are based on the OT.js tests:
// https://github.com/Operational-Transformation/ot.js/blob/
// 8873b7e28e83f9adbf6c3a28ec639c9151a838ae/test/lib/test-text-operation.js
//
'use strict'
const { expect } = require('chai')
const random = require('./support/random')
const ot = require('..')
const TextOperation = ot.TextOperation
function randomOperation(str) {
const operation = new TextOperation()
let left
while (true) {
left = str.length - operation.baseLength
if (left === 0) break
const r = Math.random()
const l = 1 + random.int(Math.min(left - 1, 20))
if (r < 0.2) {
operation.insert(random.string(l))
} else if (r < 0.4) {
operation.remove(l)
} else {
operation.retain(l)
}
}
if (Math.random() < 0.3) {
operation.insert(1 + random.string(10))
}
return operation
}
describe('TextOperation', function () {
const numTrials = 500
it('tracks base and target lengths', function () {
const o = new TextOperation()
expect(o.baseLength).to.equal(0)
expect(o.targetLength).to.equal(0)
o.retain(5)
expect(o.baseLength).to.equal(5)
expect(o.targetLength).to.equal(5)
o.insert('abc')
expect(o.baseLength).to.equal(5)
expect(o.targetLength).to.equal(8)
o.retain(2)
expect(o.baseLength).to.equal(7)
expect(o.targetLength).to.equal(10)
o.remove(2)
expect(o.baseLength).to.equal(9)
expect(o.targetLength).to.equal(10)
})
it('supports chaining', function () {
const o = new TextOperation()
.retain(5)
.retain(0)
.insert('lorem')
.insert('')
.remove('abc')
.remove(3)
.remove(0)
.remove('')
expect(o.ops.length).to.equal(3)
})
it('ignores empty operations', function () {
const o = new TextOperation()
o.retain(0)
o.insert('')
o.remove('')
expect(o.ops.length).to.equal(0)
})
it('checks for equality', function () {
const op1 = new TextOperation().remove(1).insert('lo').retain(2).retain(3)
const op2 = new TextOperation().remove(-1).insert('l').insert('o').retain(5)
expect(op1.equals(op2)).to.be.true
op1.remove(1)
op2.retain(1)
expect(op1.equals(op2)).to.be.false
})
it('merges ops', function () {
function last(arr) {
return arr[arr.length - 1]
}
const o = new TextOperation()
expect(o.ops.length).to.equal(0)
o.retain(2)
expect(o.ops.length).to.equal(1)
expect(last(o.ops)).to.equal(2)
o.retain(3)
expect(o.ops.length).to.equal(1)
expect(last(o.ops)).to.equal(5)
o.insert('abc')
expect(o.ops.length).to.equal(2)
expect(last(o.ops)).to.equal('abc')
o.insert('xyz')
expect(o.ops.length).to.equal(2)
expect(last(o.ops)).to.equal('abcxyz')
o.remove('d')
expect(o.ops.length).to.equal(3)
expect(last(o.ops)).to.equal(-1)
o.remove('d')
expect(o.ops.length).to.equal(3)
expect(last(o.ops)).to.equal(-2)
})
it('checks for no-ops', function () {
const o = new TextOperation()
expect(o.isNoop()).to.be.true
o.retain(5)
expect(o.isNoop()).to.be.true
o.retain(3)
expect(o.isNoop()).to.be.true
o.insert('lorem')
expect(o.isNoop()).to.be.false
})
it('converts to string', function () {
const o = new TextOperation()
o.retain(2)
o.insert('lorem')
o.remove('ipsum')
o.retain(5)
expect(o.toString()).to.equal(
"retain 2, insert 'lorem', remove 5, retain 5"
)
})
it('converts from JSON', function () {
const ops = [2, -1, -1, 'cde']
const o = TextOperation.fromJSON(ops)
expect(o.ops.length).to.equal(3)
expect(o.baseLength).to.equal(4)
expect(o.targetLength).to.equal(5)
function assertIncorrectAfter(fn) {
const ops2 = ops.slice(0)
fn(ops2)
expect(() => {
TextOperation.fromJSON(ops2)
}).to.throw
}
assertIncorrectAfter(ops2 => {
ops2.push({ insert: 'x' })
})
assertIncorrectAfter(ops2 => {
ops2.push(null)
})
})
it(
'applies (randomised)',
random.test(numTrials, () => {
const str = random.string(50)
const o = randomOperation(str)
expect(str.length).to.equal(o.baseLength)
expect(o.apply(str).length).to.equal(o.targetLength)
})
)
it(
'inverts (randomised)',
random.test(numTrials, () => {
const str = random.string(50)
const o = randomOperation(str)
const p = o.invert(str)
expect(o.baseLength).to.equal(p.targetLength)
expect(o.targetLength).to.equal(p.baseLength)
expect(p.apply(o.apply(str))).to.equal(str)
})
)
it(
'converts to/from JSON (randomised)',
random.test(numTrials, () => {
const doc = random.string(50)
const operation = randomOperation(doc)
const roundTripOperation = TextOperation.fromJSON(operation.toJSON())
expect(operation.equals(roundTripOperation)).to.be.true
})
)
it(
'composes (randomised)',
random.test(numTrials, () => {
// invariant: apply(str, compose(a, b)) === apply(apply(str, a), b)
const str = random.string(20)
const a = randomOperation(str)
const afterA = a.apply(str)
expect(afterA.length).to.equal(a.targetLength)
const b = randomOperation(afterA)
const afterB = b.apply(afterA)
expect(afterB.length).to.equal(b.targetLength)
const ab = a.compose(b)
expect(ab.targetLength).to.equal(b.targetLength)
const afterAB = ab.apply(str)
expect(afterAB).to.equal(afterB)
})
)
it(
'transforms (randomised)',
random.test(numTrials, () => {
// invariant: compose(a, b') = compose(b, a')
// where (a', b') = transform(a, b)
const str = random.string(20)
const a = randomOperation(str)
const b = randomOperation(str)
const primes = TextOperation.transform(a, b)
const aPrime = primes[0]
const bPrime = primes[1]
const abPrime = a.compose(bPrime)
const baPrime = b.compose(aPrime)
const afterAbPrime = abPrime.apply(str)
const afterBaPrime = baPrime.apply(str)
expect(abPrime.equals(baPrime)).to.be.true
expect(afterAbPrime).to.equal(afterBaPrime)
})
)
it('throws when invalid operations are applied', function () {
const operation = new TextOperation().retain(1)
expect(() => {
operation.apply('')
}).to.throw(TextOperation.ApplyError)
expect(() => {
operation.apply(' ')
}).not.to.throw
})
it('throws when insert text contains non BMP chars', function () {
const operation = new TextOperation()
const str = '𝌆\n'
expect(() => {
operation.insert(str)
}).to.throw(
TextOperation.UnprocessableError,
/inserted text contains non BMP characters/
)
})
it('throws when base string contains non BMP chars', function () {
const operation = new TextOperation()
const str = '𝌆\n'
expect(() => {
operation.apply(str)
}).to.throw(
TextOperation.UnprocessableError,
/string contains non BMP characters/
)
})
it('throws at from JSON when it contains non BMP chars', function () {
const operation = ['𝌆\n']
expect(() => {
TextOperation.fromJSON(operation)
}).to.throw(
TextOperation.UnprocessableError,
/inserted text contains non BMP characters/
)
})
})

View file

@ -0,0 +1,14 @@
{
"compilerOptions": {
"allowJs": true,
"allowSyntheticDefaultImports": true,
"checkJs": true,
"esModuleInterop": true,
"lib": ["es2018"],
"module": "commonjs",
"noEmit": true,
"resolveJsonModule": true,
"skipLibCheck": true
},
"include": ["lib/**/*", "typings/**/*"]
}

3
services/history-v1/.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
# managed by monorepo$ bin/update_build_scripts
.npmrc

View file

@ -0,0 +1,3 @@
{
"require": "test/setup.js"
}

View file

@ -0,0 +1 @@
16.17.1

View file

@ -0,0 +1,26 @@
# This file was auto-generated, do not edit it directly.
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
FROM node:16.17.1 as base
WORKDIR /overleaf/services/history-v1
# Google Cloud Storage needs a writable $HOME/.config for resumable uploads
# (see https://googleapis.dev/nodejs/storage/latest/File.html#createWriteStream)
RUN mkdir /home/node/.config && chown node:node /home/node/.config
FROM base as app
COPY package.json package-lock.json /overleaf/
COPY services/history-v1/package.json /overleaf/services/history-v1/
COPY libraries/ /overleaf/libraries/
RUN cd /overleaf && npm ci --quiet
COPY services/history-v1/ /overleaf/services/history-v1/
FROM app
USER node
CMD ["node", "--expose-gc", "app.js"]

View file

@ -0,0 +1,103 @@
# This file was auto-generated, do not edit it directly.
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
BUILD_NUMBER ?= local
BRANCH_NAME ?= $(shell git rev-parse --abbrev-ref HEAD)
PROJECT_NAME = history-v1
BUILD_DIR_NAME = $(shell pwd | xargs basename | tr -cd '[a-zA-Z0-9_.\-]')
DOCKER_COMPOSE_FLAGS ?= -f docker-compose.yml
DOCKER_COMPOSE := BUILD_NUMBER=$(BUILD_NUMBER) \
BRANCH_NAME=$(BRANCH_NAME) \
PROJECT_NAME=$(PROJECT_NAME) \
MOCHA_GREP=${MOCHA_GREP} \
docker-compose ${DOCKER_COMPOSE_FLAGS}
DOCKER_COMPOSE_TEST_ACCEPTANCE = \
COMPOSE_PROJECT_NAME=test_acceptance_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE)
DOCKER_COMPOSE_TEST_UNIT = \
COMPOSE_PROJECT_NAME=test_unit_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE)
clean:
-docker rmi ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER)
-docker rmi gcr.io/overleaf-ops/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER)
-$(DOCKER_COMPOSE_TEST_UNIT) down --rmi local
-$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down --rmi local
HERE=$(shell pwd)
MONOREPO=$(shell cd ../../ && pwd)
# Run the linting commands in the scope of the monorepo.
# Eslint and prettier (plus some configs) are on the root.
RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:16.17.1 npm run --silent
format:
$(RUN_LINTING) format
format_fix:
$(RUN_LINTING) format:fix
lint:
$(RUN_LINTING) lint
lint_fix:
$(RUN_LINTING) lint:fix
test: format lint test_unit test_acceptance
test_unit:
ifneq (,$(wildcard test/unit))
$(DOCKER_COMPOSE_TEST_UNIT) run --rm test_unit
$(MAKE) test_unit_clean
endif
test_clean: test_unit_clean
test_unit_clean:
ifneq (,$(wildcard test/unit))
$(DOCKER_COMPOSE_TEST_UNIT) down -v -t 0
endif
test_acceptance: test_acceptance_clean test_acceptance_pre_run test_acceptance_run
$(MAKE) test_acceptance_clean
test_acceptance_debug: test_acceptance_clean test_acceptance_pre_run test_acceptance_run_debug
$(MAKE) test_acceptance_clean
test_acceptance_run:
ifneq (,$(wildcard test/acceptance))
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance
endif
test_acceptance_run_debug:
ifneq (,$(wildcard test/acceptance))
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run -p 127.0.0.9:19999:19999 --rm test_acceptance npm run test:acceptance -- --inspect=0.0.0.0:19999 --inspect-brk
endif
test_clean: test_acceptance_clean
test_acceptance_clean:
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down -v -t 0
test_acceptance_pre_run:
ifneq (,$(wildcard test/acceptance/js/scripts/pre-run))
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance test/acceptance/js/scripts/pre-run
endif
benchmarks:
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance npm run benchmarks
build:
docker build --pull --tag ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) \
--tag gcr.io/overleaf-ops/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) \
--file Dockerfile \
../..
tar:
$(DOCKER_COMPOSE) up tar
publish:
docker push $(DOCKER_REPO)/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER)
.PHONY: clean test test_unit test_acceptance test_clean benchmarks build publish

View file

@ -0,0 +1,51 @@
## Database migrations
The history service uses knex to manage PostgreSQL migrations.
To create a new migrations, run:
```
npx knex migrate:make migration_name
```
To apply migrations, run:
```
npx knex migrate:latest
```
For more information, consult the [knex migrations
guide](https://knexjs.org/guide/migrations.html#migration-cli).
## Global blobs
Global blobs are blobs that are shared between projects. The list of global
blobs is stored in the projectHistoryGlobalBlobs Mongo collection and is read
when the service starts. Changing the list of global blobs needs to be done
carefully.
### Adding a blob to the global blobs list
If we identify a blob that appears in many projects, we might want to move that
blob to the global blobs list.
1. Add a record for the blob to the projectHistoryGlobalBlobs collection.
2. Restart the history service.
3. Delete any corresponding project blobs.
### Removing a blob from the global blobs list
Removing a blob from the global blobs list is trickier. As soon as the global
blob is made unavailable, every project that needs the blob will have to get
its own copy. To avoid disruptions, follow these steps:
1. In the projectHistoryGlobalBlobs collection, set the `demoted` property to
`false` on the global blob to remove. This will make the history system
write new instances of this blob to project blobs, but still read from the
global blob.
2. Restart the history service.
3. Copy the blob to all projects that need it.
4. Remove the blob from the projectHistoryGlobalBlobs collection.
5. Restart the history service.

View file

@ -0,0 +1,147 @@
'use strict'
const basicAuth = require('basic-auth')
const config = require('config')
const HTTPStatus = require('http-status')
const jwt = require('jsonwebtoken')
const tsscmp = require('tsscmp')
function setupBasicHttpAuthForSwaggerDocs(app) {
app.use('/docs', function (req, res, next) {
if (hasValidBasicAuthCredentials(req)) {
return next()
}
res.header('WWW-Authenticate', 'Basic realm="Application"')
res.status(HTTPStatus.UNAUTHORIZED).end()
})
}
exports.setupBasicHttpAuthForSwaggerDocs = setupBasicHttpAuthForSwaggerDocs
function hasValidBasicAuthCredentials(req) {
const credentials = basicAuth(req)
if (!credentials) return false
// No security in the name, so just use straight comparison.
if (credentials.name !== 'staging') return false
const password = config.get('basicHttpAuth.password')
if (password && tsscmp(credentials.pass, password)) return true
// Support an old password so we can change the password without downtime.
if (config.has('basicHttpAuth.oldPassword')) {
const oldPassword = config.get('basicHttpAuth.oldPassword')
if (oldPassword && tsscmp(credentials.pass, oldPassword)) return true
}
return false
}
function setupSSL(app) {
const httpsOnly = config.get('httpsOnly') === 'true'
if (!httpsOnly) {
return
}
app.enable('trust proxy')
app.use(function (req, res, next) {
if (req.protocol === 'https') {
next()
return
}
if (req.method === 'GET' || req.method === 'HEAD') {
res.redirect('https://' + req.headers.host + req.url)
} else {
res
.status(HTTPStatus.FORBIDDEN)
.send('Please use HTTPS when submitting data to this server.')
}
})
}
exports.setupSSL = setupSSL
function handleJWTAuth(req, authOrSecDef, scopesOrApiKey, next) {
// as a temporary solution, to make the OT demo still work
// this handler will also check for basic authorization
if (hasValidBasicAuthCredentials(req)) {
return next()
}
let token, err
if (authOrSecDef.name === 'token') {
token = req.query.token
} else if (
req.headers.authorization &&
req.headers.authorization.split(' ')[0] === 'Bearer'
) {
token = req.headers.authorization.split(' ')[1]
}
if (!token) {
err = new Error('jwt missing')
err.statusCode = HTTPStatus.UNAUTHORIZED
err.headers = { 'WWW-Authenticate': 'Bearer' }
return next(err)
}
let decoded
try {
decoded = decodeJWT(token)
} catch (error) {
if (
error instanceof jwt.JsonWebTokenError ||
error instanceof jwt.TokenExpiredError
) {
err = new Error(error.message)
err.statusCode = HTTPStatus.UNAUTHORIZED
err.headers = { 'WWW-Authenticate': 'Bearer error="invalid_token"' }
return next(err)
}
throw error
}
if (decoded.project_id.toString() !== req.swagger.params.project_id.value) {
err = new Error('Wrong project_id')
err.statusCode = HTTPStatus.FORBIDDEN
return next(err)
}
next()
}
/**
* Verify and decode the given JSON Web Token
*/
function decodeJWT(token) {
const key = config.get('jwtAuth.key')
const algorithm = config.get('jwtAuth.algorithm')
try {
return jwt.verify(token, key, { algorithms: [algorithm] })
} catch (err) {
// Support an old key so we can change the key without downtime.
if (config.has('jwtAuth.oldKey')) {
const oldKey = config.get('jwtAuth.oldKey')
return jwt.verify(token, oldKey, { algorithms: [algorithm] })
} else {
throw err
}
}
}
function handleBasicAuth(req, authOrSecDef, scopesOrApiKey, next) {
if (hasValidBasicAuthCredentials(req)) {
return next()
}
const error = new Error()
error.statusCode = HTTPStatus.UNAUTHORIZED
error.headers = { 'WWW-Authenticate': 'Basic realm="Application"' }
return next(error)
}
function getSwaggerHandlers() {
const handlers = {}
if (!config.has('jwtAuth.key') || !config.has('basicHttpAuth.password')) {
throw new Error('missing authentication env vars')
}
handlers.jwt = handleJWTAuth
handlers.basic = handleBasicAuth
handlers.token = handleJWTAuth
return handlers
}
exports.getSwaggerHandlers = getSwaggerHandlers

View file

@ -0,0 +1,10 @@
/**
* Turn an async function into an Express middleware
*/
function expressify(fn) {
return (req, res, next) => {
fn(req, res, next).catch(next)
}
}
module.exports = expressify

View file

@ -0,0 +1,23 @@
const logger = require('@overleaf/logger')
const expressify = require('./expressify')
const { mongodb } = require('../../storage')
async function status(req, res) {
try {
await mongodb.db.command({ ping: 1 })
} catch (err) {
logger.warn({ err }, 'Lost connection with MongoDB')
res.status(500).send('Lost connection with MongoDB')
return
}
res.send('history-v1 is up')
}
function healthCheck(req, res) {
res.send('OK')
}
module.exports = {
status: expressify(status),
healthCheck,
}

View file

@ -0,0 +1,140 @@
'use strict'
const BPromise = require('bluebird')
const HTTPStatus = require('http-status')
const core = require('overleaf-editor-core')
const Change = core.Change
const Chunk = core.Chunk
const File = core.File
const FileMap = core.FileMap
const Snapshot = core.Snapshot
const TextOperation = core.TextOperation
const logger = require('@overleaf/logger')
const storage = require('../../storage')
const BatchBlobStore = storage.BatchBlobStore
const BlobStore = storage.BlobStore
const chunkStore = storage.chunkStore
const HashCheckBlobStore = storage.HashCheckBlobStore
const persistChanges = storage.persistChanges
const render = require('./render')
exports.importSnapshot = function importSnapshot(req, res, next) {
const projectId = req.swagger.params.project_id.value
const rawSnapshot = req.swagger.params.snapshot.value
let snapshot
try {
snapshot = Snapshot.fromRaw(rawSnapshot)
} catch (err) {
return render.unprocessableEntity(res)
}
return chunkStore
.initializeProject(projectId, snapshot)
.then(function (projectId) {
res.status(HTTPStatus.OK).json({ projectId })
})
.catch(err => {
if (err instanceof chunkStore.AlreadyInitialized) {
render.conflict(res)
} else {
next(err)
}
})
}
exports.importChanges = function importChanges(req, res, next) {
const projectId = req.swagger.params.project_id.value
const rawChanges = req.swagger.params.changes.value
const endVersion = req.swagger.params.end_version.value
const returnSnapshot = req.swagger.params.return_snapshot.value || 'none'
let changes
try {
changes = rawChanges.map(Change.fromRaw)
} catch (err) {
logger.error(err)
return render.unprocessableEntity(res)
}
// Set limits to force us to persist all of the changes.
const farFuture = new Date()
farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
const limits = {
maxChanges: 0,
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
}
const blobStore = new BlobStore(projectId)
const batchBlobStore = new BatchBlobStore(blobStore)
const hashCheckBlobStore = new HashCheckBlobStore(blobStore)
function loadFiles() {
const blobHashes = new Set()
changes.forEach(function findBlobHashesToPreload(change) {
change.findBlobHashes(blobHashes)
})
function lazyLoadChangeFiles(change) {
return change.loadFiles('lazy', batchBlobStore)
}
return batchBlobStore
.preload(Array.from(blobHashes))
.then(function lazyLoadChangeFilesWithBatching() {
return BPromise.each(changes, lazyLoadChangeFiles)
})
}
function buildResultSnapshot(resultChunk) {
return BPromise.resolve(
resultChunk || chunkStore.loadLatest(projectId)
).then(function (chunk) {
const snapshot = chunk.getSnapshot()
snapshot.applyAll(chunk.getChanges())
return snapshot.store(hashCheckBlobStore)
})
}
return loadFiles()
.then(function () {
return persistChanges(projectId, changes, limits, endVersion)
})
.then(function (result) {
if (returnSnapshot === 'none') {
res.status(HTTPStatus.CREATED).json({})
} else {
return buildResultSnapshot(result && result.currentChunk).then(
function (rawSnapshot) {
res.status(HTTPStatus.CREATED).json(rawSnapshot)
}
)
}
})
.catch(err => {
if (
err instanceof Chunk.ConflictingEndVersion ||
err instanceof TextOperation.UnprocessableError ||
err instanceof File.NotEditableError ||
err instanceof FileMap.PathnameError ||
err instanceof Snapshot.EditMissingFileError ||
err instanceof chunkStore.ChunkVersionConflictError
) {
// If we failed to apply operations, that's probably because they were
// invalid.
logger.error(err)
render.unprocessableEntity(res)
} else if (err instanceof Chunk.NotFoundError) {
render.notFound(res)
} else {
next(err)
}
})
}

View file

@ -0,0 +1,235 @@
'use strict'
const _ = require('lodash')
const Path = require('path')
const Stream = require('stream')
const HTTPStatus = require('http-status')
const fs = require('fs')
const { promisify } = require('util')
const config = require('config')
const logger = require('@overleaf/logger')
const { Chunk, ChunkResponse, Blob } = require('overleaf-editor-core')
const {
BlobStore,
blobHash,
chunkStore,
HashCheckBlobStore,
ProjectArchive,
zipStore,
} = require('../../storage')
const render = require('./render')
const expressify = require('./expressify')
const withTmpDir = require('./with_tmp_dir')
const StreamSizeLimit = require('./stream_size_limit')
const pipeline = promisify(Stream.pipeline)
async function initializeProject(req, res, next) {
let projectId = req.swagger.params.body.value.projectId
try {
projectId = await chunkStore.initializeProject(projectId)
res.status(HTTPStatus.OK).json({ projectId })
} catch (err) {
if (err instanceof chunkStore.AlreadyInitialized) {
render.conflict(res)
} else {
throw err
}
}
}
async function getLatestContent(req, res, next) {
const projectId = req.swagger.params.project_id.value
const blobStore = new BlobStore(projectId)
const chunk = await chunkStore.loadLatest(projectId)
const snapshot = chunk.getSnapshot()
snapshot.applyAll(chunk.getChanges())
await snapshot.loadFiles('eager', blobStore)
res.json(snapshot.toRaw())
}
async function getLatestHashedContent(req, res, next) {
const projectId = req.swagger.params.project_id.value
const blobStore = new HashCheckBlobStore(new BlobStore(projectId))
const chunk = await chunkStore.loadLatest(projectId)
const snapshot = chunk.getSnapshot()
snapshot.applyAll(chunk.getChanges())
await snapshot.loadFiles('eager', blobStore)
const rawSnapshot = await snapshot.store(blobStore)
res.json(rawSnapshot)
}
async function getLatestHistory(req, res, next) {
const projectId = req.swagger.params.project_id.value
try {
const chunk = await chunkStore.loadLatest(projectId)
const chunkResponse = new ChunkResponse(chunk)
res.json(chunkResponse.toRaw())
} catch (err) {
if (err instanceof Chunk.NotFoundError) {
render.notFound(res)
} else {
throw err
}
}
}
async function getHistory(req, res, next) {
const projectId = req.swagger.params.project_id.value
const version = req.swagger.params.version.value
try {
const chunk = await chunkStore.loadAtVersion(projectId, version)
const chunkResponse = new ChunkResponse(chunk)
res.json(chunkResponse.toRaw())
} catch (err) {
if (err instanceof Chunk.NotFoundError) {
render.notFound(res)
} else {
throw err
}
}
}
async function getHistoryBefore(req, res, next) {
const projectId = req.swagger.params.project_id.value
const timestamp = req.swagger.params.timestamp.value
try {
const chunk = await chunkStore.loadAtTimestamp(projectId, timestamp)
const chunkResponse = new ChunkResponse(chunk)
res.json(chunkResponse.toRaw())
} catch (err) {
if (err instanceof Chunk.NotFoundError) {
render.notFound(res)
} else {
throw err
}
}
}
async function getZip(req, res, next) {
const projectId = req.swagger.params.project_id.value
const version = req.swagger.params.version.value
const blobStore = new BlobStore(projectId)
let snapshot
try {
snapshot = await getSnapshotAtVersion(projectId, version)
} catch (err) {
if (err instanceof Chunk.NotFoundError) {
return render.notFound(res)
} else {
throw err
}
}
await withTmpDir('get-zip-', async tmpDir => {
const tmpFilename = Path.join(tmpDir, 'project.zip')
const archive = new ProjectArchive(snapshot)
await archive.writeZip(blobStore, tmpFilename)
res.set('Content-Type', 'application/octet-stream')
res.set('Content-Disposition', 'attachment; filename=project.zip')
const stream = fs.createReadStream(tmpFilename)
await pipeline(stream, res)
})
}
async function createZip(req, res, next) {
const projectId = req.swagger.params.project_id.value
const version = req.swagger.params.version.value
try {
const snapshot = await getSnapshotAtVersion(projectId, version)
const zipUrl = await zipStore.getSignedUrl(projectId, version)
// Do not await this; run it in the background.
zipStore.storeZip(projectId, version, snapshot).catch(err => {
logger.error({ err, projectId, version }, 'createZip: storeZip failed')
})
res.status(HTTPStatus.OK).json({ zipUrl })
} catch (error) {
if (error instanceof Chunk.NotFoundError) {
render.notFound(res)
} else {
next(error)
}
}
}
async function deleteProject(req, res, next) {
const projectId = req.swagger.params.project_id.value
const blobStore = new BlobStore(projectId)
await Promise.all([
chunkStore.deleteProjectChunks(projectId),
blobStore.deleteBlobs(),
])
res.status(HTTPStatus.NO_CONTENT).send()
}
async function createProjectBlob(req, res, next) {
const projectId = req.swagger.params.project_id.value
const expectedHash = req.swagger.params.hash.value
const maxUploadSize = parseInt(config.get('maxFileUploadSize'), 10)
await withTmpDir('blob-', async tmpDir => {
const tmpPath = Path.join(tmpDir, 'content')
const sizeLimit = new StreamSizeLimit(maxUploadSize)
await pipeline(req, sizeLimit, fs.createWriteStream(tmpPath))
if (sizeLimit.sizeLimitExceeded) {
return render.requestEntityTooLarge(res)
}
const hash = await blobHash.fromFile(tmpPath)
if (hash !== expectedHash) {
logger.debug({ hash, expectedHash }, 'Hash mismatch')
return render.conflict(res, 'File hash mismatch')
}
const blobStore = new BlobStore(projectId)
await blobStore.putFile(tmpPath)
res.status(HTTPStatus.CREATED).end()
})
}
async function getProjectBlob(req, res, next) {
const projectId = req.swagger.params.project_id.value
const hash = req.swagger.params.hash.value
const blobStore = new BlobStore(projectId)
let stream
try {
stream = await blobStore.getStream(hash)
} catch (err) {
if (err instanceof Blob.NotFoundError) {
return render.notFound(res)
} else {
throw err
}
}
res.set('Content-Type', 'application/octet-stream')
await pipeline(stream, res)
}
async function getSnapshotAtVersion(projectId, version) {
const chunk = await chunkStore.loadAtVersion(projectId, version)
const snapshot = chunk.getSnapshot()
const changes = _.dropRight(
chunk.getChanges(),
chunk.getEndVersion() - version
)
snapshot.applyAll(changes)
return snapshot
}
module.exports = {
initializeProject: expressify(initializeProject),
getLatestContent: expressify(getLatestContent),
getLatestHashedContent: expressify(getLatestHashedContent),
getLatestPersistedHistory: expressify(getLatestHistory),
getLatestHistory: expressify(getLatestHistory),
getHistory: expressify(getHistory),
getHistoryBefore: expressify(getHistoryBefore),
getZip: expressify(getZip),
createZip: expressify(createZip),
deleteProject: expressify(deleteProject),
createProjectBlob: expressify(createProjectBlob),
getProjectBlob: expressify(getProjectBlob),
}

View file

@ -0,0 +1,17 @@
'use strict'
const HTTPStatus = require('http-status')
function makeErrorRenderer(status) {
return (res, message) => {
res.status(status).json({ message: message || HTTPStatus[status] })
}
}
module.exports = {
badRequest: makeErrorRenderer(HTTPStatus.BAD_REQUEST),
notFound: makeErrorRenderer(HTTPStatus.NOT_FOUND),
unprocessableEntity: makeErrorRenderer(HTTPStatus.UNPROCESSABLE_ENTITY),
conflict: makeErrorRenderer(HTTPStatus.CONFLICT),
requestEntityTooLarge: makeErrorRenderer(HTTPStatus.REQUEST_ENTITY_TOO_LARGE),
}

View file

@ -0,0 +1,26 @@
const stream = require('stream')
/**
* Transform stream that stops passing bytes through after some threshold has
* been reached.
*/
class StreamSizeLimit extends stream.Transform {
constructor(maxSize) {
super()
this.maxSize = maxSize
this.accumulatedSize = 0
this.sizeLimitExceeded = false
}
_transform(chunk, encoding, cb) {
this.accumulatedSize += chunk.length
if (this.accumulatedSize > this.maxSize) {
this.sizeLimitExceeded = true
} else {
this.push(chunk)
}
cb()
}
}
module.exports = StreamSizeLimit

View file

@ -0,0 +1,27 @@
const fs = require('fs')
const fsExtra = require('fs-extra')
const logger = require('@overleaf/logger')
const os = require('os')
const path = require('path')
/**
* Create a temporary directory before executing a function and cleaning up
* after.
*
* @param {string} prefix - prefix for the temporary directory name
* @param {Function} fn - async function to call
*/
async function withTmpDir(prefix, fn) {
const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), prefix))
try {
await fn(tmpDir)
} finally {
fsExtra.remove(tmpDir).catch(err => {
if (err.code !== 'ENOENT') {
logger.error({ err }, 'failed to delete temporary file')
}
})
}
}
module.exports = withTmpDir

View file

@ -0,0 +1,256 @@
'use strict'
const _ = require('lodash')
const paths = _.reduce(
[require('./projects').paths, require('./project_import').paths],
_.extend
)
const securityDefinitions = require('./security_definitions')
module.exports = {
swagger: '2.0',
info: {
title: 'Overleaf Editor API',
description: 'API for the Overleaf editor.',
version: '1.0',
},
produces: ['application/json'],
basePath: '/api',
paths,
securityDefinitions,
security: [
{
jwt: [],
},
],
definitions: {
Project: {
properties: {
projectId: {
type: 'string',
},
},
required: ['projectId'],
},
File: {
properties: {
hash: {
type: 'string',
},
byteLength: {
type: 'integer',
},
stringLength: {
type: 'integer',
},
},
},
Label: {
properties: {
authorId: {
type: 'integer',
},
text: {
type: 'string',
},
timestamp: {
type: 'string',
},
version: {
type: 'integer',
},
},
},
Chunk: {
properties: {
history: {
$ref: '#/definitions/History',
},
startVersion: {
type: 'number',
},
},
},
ChunkResponse: {
properties: {
chunk: {
$ref: '#/definitions/Chunk',
},
authors: {
type: 'array',
items: {
$ref: '#/definitions/Author',
},
},
},
},
History: {
properties: {
snapshot: {
$ref: '#/definitions/Snapshot',
},
changes: {
type: 'array',
items: {
$ref: '#/definitions/Change',
},
},
},
},
Snapshot: {
properties: {
files: {
type: 'object',
additionalProperties: {
$ref: '#/definitions/File',
},
},
},
required: ['files'],
},
Change: {
properties: {
timestamp: {
type: 'string',
},
operations: {
type: 'array',
items: {
$ref: '#/definitions/Operation',
},
},
authors: {
type: 'array',
items: {
type: ['integer', 'null'],
},
},
v2Authors: {
type: 'array',
items: {
type: ['string', 'null'],
},
},
projectVersion: {
type: 'string',
},
v2DocVersions: {
type: 'object',
additionalProperties: {
$ref: '#/definitions/V2DocVersions',
},
},
},
required: ['timestamp', 'operations'],
},
V2DocVersions: {
properties: {
pathname: {
type: 'string',
},
v: {
type: 'integer',
},
},
},
ChangeRequest: {
properties: {
baseVersion: {
type: 'integer',
},
untransformable: {
type: 'boolean',
},
operations: {
type: 'array',
items: {
$ref: '#/definitions/Operation',
},
},
authors: {
type: 'array',
items: {
type: ['integer', 'null'],
},
},
},
required: ['baseVersion', 'operations'],
},
ChangeNote: {
properties: {
baseVersion: {
type: 'integer',
},
change: {
$ref: '#/definitions/Change',
},
},
required: ['baseVersion'],
},
Operation: {
properties: {
pathname: {
type: 'string',
},
newPathname: {
type: 'string',
},
blob: {
$ref: '#/definitions/Blob',
},
textOperation: {
type: 'array',
items: {},
},
file: {
$ref: '#/definitions/File',
},
},
},
Error: {
properties: {
message: {
type: 'string',
},
},
required: ['message'],
},
Blob: {
properties: {
hash: {
type: 'string',
},
},
required: ['hash'],
},
Author: {
properties: {
id: {
type: 'integer',
},
email: {
type: 'string',
},
name: {
type: 'string',
},
},
required: ['id', 'email', 'name'],
},
SyncState: {
properties: {
synced: {
type: 'boolean',
},
},
},
ZipInfo: {
properties: {
zipUrl: {
type: 'string',
},
},
required: ['zipUrl'],
},
},
}

View file

@ -0,0 +1,108 @@
'use strict'
const importSnapshot = {
'x-swagger-router-controller': 'project_import',
operationId: 'importSnapshot',
tags: ['ProjectImport'],
description: 'Import a snapshot from the current rails app.',
consumes: ['application/json'],
parameters: [
{
name: 'project_id',
in: 'path',
description: 'project id',
required: true,
type: 'string',
},
{
name: 'snapshot',
in: 'body',
description: 'Snapshot to import.',
required: true,
schema: {
$ref: '#/definitions/Snapshot',
},
},
],
responses: {
200: {
description: 'Imported',
},
409: {
description: 'Conflict: project already initialized',
},
404: {
description: 'No such project exists',
},
},
security: [
{
basic: [],
},
],
}
const importChanges = {
'x-swagger-router-controller': 'project_import',
operationId: 'importChanges',
tags: ['ProjectImport'],
description: 'Import changes for a project from the current rails app.',
consumes: ['application/json'],
parameters: [
{
name: 'project_id',
in: 'path',
description: 'project id',
required: true,
type: 'string',
},
{
name: 'end_version',
description: 'end_version of latest persisted chunk',
in: 'query',
required: true,
type: 'number',
},
{
name: 'return_snapshot',
description:
'optionally, return a snapshot with the latest hashed content',
in: 'query',
required: false,
type: 'string',
enum: ['hashed', 'none'],
},
{
name: 'changes',
in: 'body',
description: 'changes to be imported',
required: true,
schema: {
type: 'array',
items: {
$ref: '#/definitions/Change',
},
},
},
],
responses: {
201: {
description: 'Created',
schema: {
$ref: '#/definitions/Snapshot',
},
},
},
security: [
{
basic: [],
},
],
}
exports.paths = {
'/projects/{project_id}/import': { post: importSnapshot },
'/projects/{project_id}/legacy_import': { post: importSnapshot },
'/projects/{project_id}/changes': { post: importChanges },
'/projects/{project_id}/legacy_changes': { post: importChanges },
}

View file

@ -0,0 +1,429 @@
'use strict'
const Blob = require('overleaf-editor-core').Blob
exports.paths = {
'/projects': {
post: {
'x-swagger-router-controller': 'projects',
operationId: 'initializeProject',
tags: ['Project'],
description: 'Initialize project.',
parameters: [
{
name: 'body',
in: 'body',
schema: {
type: 'object',
properties: {
projectId: { type: 'string' },
},
},
},
],
responses: {
200: {
description: 'Initialized',
schema: {
$ref: '#/definitions/Project',
},
},
},
security: [
{
basic: [],
},
],
},
},
'/projects/{project_id}': {
delete: {
'x-swagger-router-controller': 'projects',
operationId: 'deleteProject',
tags: ['Project'],
description: "Delete a project's history",
parameters: [
{
name: 'project_id',
in: 'path',
description: 'project id',
required: true,
type: 'string',
},
],
responses: {
204: {
description: 'Success',
},
},
security: [
{
basic: [],
},
],
},
},
'/projects/{project_id}/blobs/{hash}': {
get: {
'x-swagger-router-controller': 'projects',
operationId: 'getProjectBlob',
tags: ['Project'],
description: 'Fetch blob content by its project id and hash.',
parameters: [
{
name: 'project_id',
in: 'path',
description: 'project id',
required: true,
type: 'string',
},
{
name: 'hash',
in: 'path',
description: 'Hexadecimal SHA-1 hash',
required: true,
type: 'string',
pattern: Blob.HEX_HASH_RX_STRING,
},
],
produces: ['application/octet-stream'],
responses: {
200: {
description: 'Success',
schema: {
type: 'file',
},
},
404: {
description: 'Not Found',
schema: {
$ref: '#/definitions/Error',
},
},
},
security: [{ jwt: [] }, { token: [] }],
},
put: {
'x-swagger-router-controller': 'projects',
operationId: 'createProjectBlob',
tags: ['Project'],
description:
'Create blob to be used in a file addition operation when importing a' +
' snapshot or changes',
parameters: [
{
name: 'project_id',
in: 'path',
description: 'project id',
required: true,
type: 'string',
},
{
name: 'hash',
in: 'path',
description: 'Hexadecimal SHA-1 hash',
required: true,
type: 'string',
pattern: Blob.HEX_HASH_RX_STRING,
},
],
responses: {
201: {
description: 'Created',
},
},
},
},
'/projects/{project_id}/latest/content': {
get: {
'x-swagger-router-controller': 'projects',
operationId: 'getLatestContent',
tags: ['Project'],
description:
'Get full content of the latest version. Text file ' +
'content is included, but binary files are just linked by hash.',
parameters: [
{
name: 'project_id',
in: 'path',
description: 'project id',
required: true,
type: 'string',
},
],
responses: {
200: {
description: 'Success',
schema: {
$ref: '#/definitions/Snapshot',
},
},
404: {
description: 'Not Found',
schema: {
$ref: '#/definitions/Error',
},
},
},
},
},
'/projects/{project_id}/latest/hashed_content': {
get: {
'x-swagger-router-controller': 'projects',
operationId: 'getLatestHashedContent',
tags: ['Project'],
description:
'Get a snapshot of a project at the latest version ' +
'with the hashes for the contents each file',
parameters: [
{
name: 'project_id',
in: 'path',
description: 'project id',
required: true,
type: 'string',
},
],
responses: {
200: {
description: 'Success',
schema: {
$ref: '#/definitions/Snapshot',
},
},
404: {
description: 'Not Found',
schema: {
$ref: '#/definitions/Error',
},
},
},
security: [
{
basic: [],
},
],
},
},
'/projects/{project_id}/latest/history': {
get: {
'x-swagger-router-controller': 'projects',
operationId: 'getLatestHistory',
tags: ['Project'],
description:
'Get the latest sequence of changes.' +
' TODO probably want a configurable depth.',
parameters: [
{
name: 'project_id',
in: 'path',
description: 'project id',
required: true,
type: 'string',
},
],
responses: {
200: {
description: 'Success',
schema: {
$ref: '#/definitions/ChunkResponse',
},
},
404: {
description: 'Not Found',
schema: {
$ref: '#/definitions/Error',
},
},
},
},
},
'/projects/{project_id}/latest/persistedHistory': {
get: {
'x-swagger-router-controller': 'projects',
operationId: 'getLatestPersistedHistory',
tags: ['Project'],
description: 'Get the latest sequence of changes.',
parameters: [
{
name: 'project_id',
in: 'path',
description: 'project id',
required: true,
type: 'string',
},
],
responses: {
200: {
description: 'Success',
schema: {
$ref: '#/definitions/ChunkResponse',
},
},
404: {
description: 'Not Found',
schema: {
$ref: '#/definitions/Error',
},
},
},
},
},
'/projects/{project_id}/versions/{version}/history': {
get: {
'x-swagger-router-controller': 'projects',
operationId: 'getHistory',
tags: ['Project'],
description:
'Get the sequence of changes that includes the given version.',
parameters: [
{
name: 'project_id',
in: 'path',
description: 'project id',
required: true,
type: 'string',
},
{
name: 'version',
in: 'path',
description: 'numeric version',
required: true,
type: 'number',
},
],
responses: {
200: {
description: 'Success',
schema: {
$ref: '#/definitions/ChunkResponse',
},
},
404: {
description: 'Not Found',
schema: {
$ref: '#/definitions/Error',
},
},
},
},
},
'/projects/{project_id}/timestamp/{timestamp}/history': {
get: {
'x-swagger-router-controller': 'projects',
operationId: 'getHistoryBefore',
tags: ['Project'],
description:
'Get the sequence of changes. ' + ' before the given timestamp',
parameters: [
{
name: 'project_id',
in: 'path',
description: 'project id',
required: true,
type: 'string',
},
{
name: 'timestamp',
in: 'path',
description: 'timestamp',
required: true,
type: 'string',
format: 'date-time',
},
],
responses: {
200: {
description: 'Success',
schema: {
$ref: '#/definitions/ChunkResponse',
},
},
404: {
description: 'Not Found',
schema: {
$ref: '#/definitions/Error',
},
},
},
},
},
'/projects/{project_id}/version/{version}/zip': {
get: {
'x-swagger-router-controller': 'projects',
operationId: 'getZip',
tags: ['Project'],
description: 'Download zip with project content',
parameters: [
{
name: 'project_id',
in: 'path',
description: 'project id',
required: true,
type: 'string',
},
{
name: 'version',
in: 'path',
description: 'numeric version',
required: true,
type: 'number',
},
],
produces: ['application/octet-stream'],
responses: {
200: {
description: 'success',
},
404: {
description: 'not found',
},
},
security: [
{
token: [],
},
],
},
post: {
'x-swagger-router-controller': 'projects',
operationId: 'createZip',
tags: ['Project'],
description:
'Create a zip file with project content. Returns a link to be polled.',
parameters: [
{
name: 'project_id',
in: 'path',
description: 'project id',
required: true,
type: 'string',
},
{
name: 'version',
in: 'path',
description: 'numeric version',
required: true,
type: 'number',
},
],
responses: {
200: {
description: 'success',
schema: {
$ref: '#/definitions/ZipInfo',
},
},
404: {
description: 'not found',
},
},
security: [
{
basic: [],
},
],
},
},
}

View file

@ -0,0 +1,17 @@
'use strict'
module.exports = {
jwt: {
type: 'apiKey',
in: 'header',
name: 'authorization',
},
basic: {
type: 'basic',
},
token: {
type: 'apiKey',
in: 'query',
name: 'token',
},
}

169
services/history-v1/app.js Normal file
View file

@ -0,0 +1,169 @@
'use strict'
/* eslint-disable no-console */
// Initialize metrics as early as possible because this is where the Google
// profiling agents are also started.
const Metrics = require('@overleaf/metrics')
Metrics.initialize('history-v1')
const BPromise = require('bluebird')
const express = require('express')
const cors = require('cors')
const helmet = require('helmet')
const HTTPStatus = require('http-status')
const logger = require('@overleaf/logger')
const cookieParser = require('cookie-parser')
const bodyParser = require('body-parser')
const swaggerTools = require('swagger-tools')
const swaggerDoc = require('./api/swagger')
const security = require('./api/app/security')
const healthChecks = require('./api/controllers/health_checks')
const { mongodb, loadGlobalBlobs } = require('./storage')
const app = express()
module.exports = app
logger.initialize('history-v1')
Metrics.injectMetricsRoute(app)
app.use(Metrics.http.monitor(logger))
// We may have fairly large JSON bodies when receiving large Changes. Clients
// may have to handle 413 status codes and try creating files instead of sending
// text content in changes.
app.use(bodyParser.json({ limit: '4MB' }))
app.use(
bodyParser.urlencoded({
extended: false,
})
)
app.use(cookieParser())
app.use(cors())
security.setupSSL(app)
security.setupBasicHttpAuthForSwaggerDocs(app)
app.use(function (req, res, next) {
// use a 5 minute timeout on all responses
res.setTimeout(5 * 60 * 1000)
next()
})
app.get('/', function (req, res) {
res.send('')
})
app.get('/status', healthChecks.status)
app.get('/health_check', healthChecks.healthCheck)
function setupSwagger() {
return new BPromise(function (resolve) {
swaggerTools.initializeMiddleware(swaggerDoc, function (middleware) {
app.use(middleware.swaggerMetadata())
app.use(middleware.swaggerSecurity(security.getSwaggerHandlers()))
app.use(middleware.swaggerValidator())
app.use(
middleware.swaggerRouter({
controllers: './api/controllers',
useStubs: app.get('env') === 'development',
})
)
app.use(middleware.swaggerUi())
resolve()
})
})
}
function setupErrorHandling() {
app.use(function (req, res, next) {
const err = new Error('Not Found')
err.status = HTTPStatus.NOT_FOUND
return next(err)
})
// Handle Swagger errors.
app.use(function (err, req, res, next) {
if (res.headersSent) {
return next(err)
}
if (err.code === 'SCHEMA_VALIDATION_FAILED') {
logger.error(err)
return res.status(HTTPStatus.UNPROCESSABLE_ENTITY).json(err.results)
}
if (err.code === 'INVALID_TYPE' || err.code === 'PATTERN') {
logger.error(err)
return res.status(HTTPStatus.UNPROCESSABLE_ENTITY).json({
message: 'invalid type: ' + err.paramName,
})
}
if (err.code === 'ENUM_MISMATCH') {
return res.status(HTTPStatus.UNPROCESSABLE_ENTITY).json({
message: 'invalid enum value: ' + err.paramName,
})
}
if (err.code === 'REQUIRED') {
return res.status(HTTPStatus.UNPROCESSABLE_ENTITY).json({
message: err.message,
})
}
next(err)
})
app.use(function (err, req, res, next) {
logger.error(err)
if (res.headersSent) {
return next(err)
}
// Handle errors that specify a statusCode. Some come from our code. Some
// bubble up from AWS SDK, but they sometimes have the statusCode set to
// 200, notably some InternalErrors and TimeoutErrors, so we have to guard
// against that. We also check `status`, but `statusCode` is preferred.
const statusCode = err.statusCode || err.status
if (statusCode && statusCode >= 400 && statusCode < 600) {
res.status(statusCode)
} else {
res.status(HTTPStatus.INTERNAL_SERVER_ERROR)
}
const sendErrorToClient = app.get('env') === 'development'
res.json({
message: err.message,
error: sendErrorToClient ? err : {},
})
})
}
app.setup = async function appSetup() {
await mongodb.client.connect()
logger.info('Connected to MongoDB')
await loadGlobalBlobs()
logger.info('Global blobs loaded')
app.use(helmet())
await setupSwagger()
setupErrorHandling()
}
async function startApp() {
await app.setup()
const port = parseInt(process.env.PORT, 10) || 3100
app.listen(port, err => {
if (err) {
console.error(err)
process.exit(1)
}
Metrics.event_loop.monitor(logger)
Metrics.memory.monitor(logger)
})
}
// Run this if we're called directly
if (!module.parent) {
startApp().catch(err => {
console.error(err)
process.exit(1)
})
}

View file

@ -0,0 +1,82 @@
const crypto = require('crypto')
const benny = require('benny')
const { Blob } = require('overleaf-editor-core')
const mongoBackend = require('../storage/lib/blob_store/mongo')
const postgresBackend = require('../storage/lib/blob_store/postgres')
const cleanup = require('../test/acceptance/js/storage/support/cleanup')
const MONGO_PROJECT_ID = '637386deb4ce3c62acd3848e'
const POSTGRES_PROJECT_ID = '123'
async function run() {
for (const blobCount of [1, 10, 100, 1000, 10000, 100000, 500000]) {
await cleanup.everything()
const blobs = createBlobs(blobCount)
await insertBlobs(blobs)
const randomHashes = getRandomHashes(blobs, 100)
await benny.suite(
`Read a blob in a project with ${blobCount} blobs`,
benny.add('Mongo backend', async () => {
await mongoBackend.findBlob(MONGO_PROJECT_ID, randomHashes[0])
}),
benny.add('Postgres backend', async () => {
await postgresBackend.findBlob(POSTGRES_PROJECT_ID, randomHashes[0])
}),
benny.cycle(),
benny.complete()
)
await benny.suite(
`Read 100 blobs in a project with ${blobCount} blobs`,
benny.add('Mongo backend', async () => {
await mongoBackend.findBlobs(MONGO_PROJECT_ID, randomHashes)
}),
benny.add('Postgres backend', async () => {
await postgresBackend.findBlobs(POSTGRES_PROJECT_ID, randomHashes)
}),
benny.cycle(),
benny.complete()
)
await benny.suite(
`Insert a blob in a project with ${blobCount} blobs`,
benny.add('Mongo backend', async () => {
const [newBlob] = createBlobs(1)
await mongoBackend.insertBlob(MONGO_PROJECT_ID, newBlob)
}),
benny.add('Postgres backend', async () => {
const [newBlob] = createBlobs(1)
await postgresBackend.insertBlob(POSTGRES_PROJECT_ID, newBlob)
}),
benny.cycle(),
benny.complete()
)
}
}
function createBlobs(blobCount) {
const blobs = []
for (let i = 0; i < blobCount; i++) {
const hash = crypto.randomBytes(20).toString('hex')
blobs.push(new Blob(hash, 42, 42))
}
return blobs
}
async function insertBlobs(blobs) {
for (const blob of blobs) {
await Promise.all([
mongoBackend.insertBlob(MONGO_PROJECT_ID, blob),
postgresBackend.insertBlob(POSTGRES_PROJECT_ID, blob),
])
}
}
function getRandomHashes(blobs, count) {
const hashes = []
for (let i = 0; i < count; i++) {
const index = Math.floor(Math.random() * blobs.length)
hashes.push(blobs[index].getHash())
}
return hashes
}
module.exports = run

View file

@ -0,0 +1,17 @@
const testSetup = require('../test/setup')
const blobStoreSuite = require('./blob_store')
async function main() {
await testSetup.setupPostgresDatabase()
await testSetup.createGcsBuckets()
await blobStoreSuite()
}
main()
.then(() => {
process.exit(0)
})
.catch(err => {
console.error(err)
process.exit(1)
})

View file

@ -0,0 +1,8 @@
history-v1
--dependencies=postgres,gcs,mongo
--docker-repos=gcr.io/overleaf-ops
--env-add=
--env-pass-through=
--node-version=16.17.1
--public-repo=False
--script-version=4.1.0

View file

@ -0,0 +1,572 @@
{
"AWSTemplateFormatVersion": "2010-09-09",
"Metadata": {
"AWS::CloudFormation::Designer": {
"ee78c12d-0d1e-4ca0-8fa9-ba02f49d071c": {
"size": {
"width": 60,
"height": 60
},
"position": {
"x": 30,
"y": 60
},
"z": 0,
"embeds": []
},
"a52902b8-f027-45a8-9151-3e56ced5fb42": {
"size": {
"width": 60,
"height": 60
},
"position": {
"x": 30,
"y": 140
},
"z": 0,
"embeds": []
},
"674a64fc-3703-4222-91b9-4878490489e2": {
"size": {
"width": 60,
"height": 60
},
"position": {
"x": 250,
"y": 100
},
"z": 0,
"embeds": [],
"isassociatedwith": [
"5c314e8e-535b-4b09-8bb7-c089794a3829"
]
},
"5c314e8e-535b-4b09-8bb7-c089794a3829": {
"size": {
"width": 60,
"height": 60
},
"position": {
"x": 250,
"y": 210
},
"z": 0,
"embeds": []
},
"3da9a376-afc1-4b37-add1-9cf0df20b0a0": {
"size": {
"width": 60,
"height": 60
},
"position": {
"x": 360,
"y": 100
},
"z": 0,
"embeds": []
},
"7fd11cc7-5574-44f3-99df-877b6f0f2a74": {
"size": {
"width": 60,
"height": 60
},
"position": {
"x": 130,
"y": 60
},
"z": 0,
"embeds": [],
"isassociatedwith": [
"ee78c12d-0d1e-4ca0-8fa9-ba02f49d071c"
]
},
"1d8a8e19-2661-44d4-99c0-4a2c88c8557d": {
"size": {
"width": 60,
"height": 60
},
"position": {
"x": 130,
"y": 140
},
"z": 0,
"embeds": [],
"isassociatedwith": [
"a52902b8-f027-45a8-9151-3e56ced5fb42"
]
},
"e29c9a81-85ad-4511-ab1e-018fe50f1573": {
"size": {
"width": 60,
"height": 60
},
"position": {
"x": 30,
"y": 220
},
"z": 0,
"embeds": []
},
"1388662c-85e1-4f6e-9b80-0f1888a6e07d": {
"size": {
"width": 60,
"height": 60
},
"position": {
"x": 130,
"y": 220
},
"z": 0,
"embeds": [],
"isassociatedwith": [
"e29c9a81-85ad-4511-ab1e-018fe50f1573"
]
},
"236600ec-46ca-4770-8d7c-61532a6d8c27": {
"size": {
"width": 60,
"height": 60
},
"position": {
"x": 30,
"y": 300
},
"z": 0,
"embeds": []
},
"454a6298-2f35-48d7-8cd5-3152d78a585b": {
"size": {
"width": 60,
"height": 60
},
"position": {
"x": 130,
"y": 300
},
"z": 0,
"embeds": []
}
}
},
"Resources": {
"Blobs": {
"Type": "AWS::S3::Bucket",
"Properties": {
"BucketName": {
"Fn::Join": [
"-",
[
{
"Ref": "OverleafEditorBucketPrefix"
},
"blobs"
]
]
},
"VersioningConfiguration": {
"Status": "Enabled"
},
"LifecycleConfiguration": {
"Rules": [
{
"NoncurrentVersionExpirationInDays": 90,
"Status": "Enabled"
}
]
},
"BucketEncryption": {
"ServerSideEncryptionConfiguration": [
{
"ServerSideEncryptionByDefault": {
"SSEAlgorithm": "AES256"
}
}
]
}
},
"Metadata": {
"AWS::CloudFormation::Designer": {
"id": "ee78c12d-0d1e-4ca0-8fa9-ba02f49d071c"
}
}
},
"Chunks": {
"Type": "AWS::S3::Bucket",
"Properties": {
"BucketName": {
"Fn::Join": [
"-",
[
{
"Ref": "OverleafEditorBucketPrefix"
},
"chunks"
]
]
},
"VersioningConfiguration": {
"Status": "Enabled"
},
"LifecycleConfiguration": {
"Rules": [
{
"NoncurrentVersionExpirationInDays": 80,
"Status": "Enabled"
}
]
},
"BucketEncryption": {
"ServerSideEncryptionConfiguration": [
{
"ServerSideEncryptionByDefault": {
"SSEAlgorithm": "AES256"
}
}
]
}
},
"Metadata": {
"AWS::CloudFormation::Designer": {
"id": "a52902b8-f027-45a8-9151-3e56ced5fb42"
}
}
},
"APIUser": {
"Type": "AWS::IAM::User",
"Properties": {
"Groups": [
{
"Ref": "APIGroup"
}
]
},
"Metadata": {
"AWS::CloudFormation::Designer": {
"id": "674a64fc-3703-4222-91b9-4878490489e2"
}
}
},
"APIGroup": {
"Type": "AWS::IAM::Group",
"Properties": {},
"Metadata": {
"AWS::CloudFormation::Designer": {
"id": "5c314e8e-535b-4b09-8bb7-c089794a3829"
}
}
},
"APIUserAccessKey": {
"Type": "AWS::IAM::AccessKey",
"Properties": {
"UserName": {
"Ref": "APIUser"
}
},
"Metadata": {
"AWS::CloudFormation::Designer": {
"id": "3da9a376-afc1-4b37-add1-9cf0df20b0a0"
}
}
},
"BlobsPolicy": {
"Type": "AWS::S3::BucketPolicy",
"Properties": {
"Bucket": {
"Ref": "Blobs"
},
"PolicyDocument": {
"Id": "BlobsPolicy",
"Version": "2012-10-17",
"Statement": [
{
"Sid": "BlobsPolicyAPIUser",
"Action": [
"s3:GetObject",
"s3:PutObject"
],
"Effect": "Allow",
"Resource": {
"Fn::Join": [
"",
[
"arn:aws:s3:::",
{
"Ref": "Blobs"
},
"/*"
]
]
},
"Principal": {
"AWS": {
"Fn::GetAtt": [
"APIUser",
"Arn"
]
}
}
}
]
}
},
"Metadata": {
"AWS::CloudFormation::Designer": {
"id": "7fd11cc7-5574-44f3-99df-877b6f0f2a74"
}
}
},
"ChunksPolicy": {
"Type": "AWS::S3::BucketPolicy",
"Properties": {
"Bucket": {
"Ref": "Chunks"
},
"PolicyDocument": {
"Id": "ChunksPolicy",
"Version": "2012-10-17",
"Statement": [
{
"Sid": "ChunksPolicyAPIUser",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
],
"Effect": "Allow",
"Resource": {
"Fn::Join": [
"",
[
"arn:aws:s3:::",
{
"Ref": "Chunks"
},
"/*"
]
]
},
"Principal": {
"AWS": {
"Fn::GetAtt": [
"APIUser",
"Arn"
]
}
}
}
]
}
},
"Metadata": {
"AWS::CloudFormation::Designer": {
"id": "1d8a8e19-2661-44d4-99c0-4a2c88c8557d"
}
}
},
"Zips": {
"Type": "AWS::S3::Bucket",
"Properties": {
"BucketName": {
"Fn::Join": [
"-",
[
{
"Ref": "OverleafEditorBucketPrefix"
},
"zips"
]
]
},
"BucketEncryption": {
"ServerSideEncryptionConfiguration": [
{
"ServerSideEncryptionByDefault": {
"SSEAlgorithm": "AES256"
}
}
]
},
"LifecycleConfiguration": {
"Rules": [
{
"ExpirationInDays": 1,
"Status": "Enabled"
}
]
},
"PublicAccessBlockConfiguration": {
"BlockPublicAcls": true,
"BlockPublicPolicy": true,
"IgnorePublicAcls": true,
"RestrictPublicBuckets": true
}
},
"Metadata": {
"AWS::CloudFormation::Designer": {
"id": "e29c9a81-85ad-4511-ab1e-018fe50f1573"
}
}
},
"ZipsPolicy": {
"Type": "AWS::S3::BucketPolicy",
"Properties": {
"Bucket": {
"Ref": "Zips"
},
"PolicyDocument": {
"Id": "ZipsPolicy",
"Version": "2012-10-17",
"Statement": [
{
"Sid": "ZipsPolicyAPIUser",
"Action": [
"s3:GetObject",
"s3:PutObject"
],
"Effect": "Allow",
"Resource": {
"Fn::Join": [
"",
[
"arn:aws:s3:::",
{
"Ref": "Zips"
},
"/*"
]
]
},
"Principal": {
"AWS": {
"Fn::GetAtt": [
"APIUser",
"Arn"
]
}
}
}
]
}
},
"Metadata": {
"AWS::CloudFormation::Designer": {
"id": "1388662c-85e1-4f6e-9b80-0f1888a6e07d"
}
}
},
"Analytics": {
"Type": "AWS::S3::Bucket",
"Properties": {
"BucketName": {
"Fn::Join": [
"-",
[
{
"Ref": "OverleafEditorBucketPrefix"
},
"analytics"
]
]
}
},
"Metadata": {
"AWS::CloudFormation::Designer": {
"id": "236600ec-46ca-4770-8d7c-61532a6d8c27"
}
}
},
"AnalyticsPolicy": {
"Type": "AWS::S3::BucketPolicy",
"Properties": {
"Bucket": {
"Ref": "Analytics"
},
"PolicyDocument": {
"Id": "AnalyticsPolicy",
"Version": "2012-10-17",
"Statement": [
{
"Sid": "AnalyticsPolicyAPIUser",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
],
"Effect": "Allow",
"Resource": {
"Fn::Join": [
"",
[
"arn:aws:s3:::",
{
"Ref": "Analytics"
},
"/*"
]
]
},
"Principal": {
"AWS": {
"Fn::GetAtt": [
"APIUser",
"Arn"
]
}
}
},
{
"Sid": "AnalyticsPolicyAPIUserBucketPerms",
"Action": "s3:ListBucket",
"Effect": "Allow",
"Resource": {
"Fn::Join": [
"",
[
"arn:aws:s3:::",
{
"Ref": "Analytics"
}
]
]
},
"Principal": {
"AWS": {
"Fn::GetAtt": [
"APIUser",
"Arn"
]
}
}
}
]
}
},
"Metadata": {
"AWS::CloudFormation::Designer": {
"id": "454a6298-2f35-48d7-8cd5-3152d78a585b"
}
}
}
},
"Parameters": {
"OverleafEditorBucketPrefix": {
"Description": "Prefix for the S3 bucket names (e.g. production-overleaf-editor or staging-overleaf-editor)",
"Type": "String"
}
},
"Outputs": {
"APIUserAccessKey": {
"Value": {
"Ref": "APIUserAccessKey"
}
},
"APIUserSecretKey": {
"Value": {
"Fn::GetAtt": [
"APIUserAccessKey",
"SecretAccessKey"
]
}
}
}
}

View file

@ -0,0 +1,60 @@
{
"databaseUrl": "HISTORY_CONNECTION_STRING",
"herokuDatabaseUrl": "DATABASE_URL",
"databasePoolMin": "DATABASE_POOL_MIN",
"databasePoolMax": "DATABASE_POOL_MAX",
"persistor": {
"backend": "PERSISTOR_BACKEND",
"s3": {
"key": "AWS_ACCESS_KEY_ID",
"secret": "AWS_SECRET_ACCESS_KEY",
"maxRetries": "S3_MAX_RETRIES",
"httpOptions": {
"timeout": "S3_TIMEOUT"
}
},
"gcs": {
"deletedBucketSuffix": "GCS_DELETED_BUCKET_SUFFIX",
"unlockBeforeDelete": "GCS_UNLOCK_BEFORE_DELETE",
"endpoint": {
"apiEndpoint": "GCS_API_ENDPOINT",
"apiScheme": "GCS_API_SCHEME",
"projectId": "GCS_PROJECT_ID"
}
},
"fallback": {
"backend": "PERSISTOR_FALLBACK_BACKEND",
"buckets": "PERSISTOR_BUCKET_MAPPING"
}
},
"blobStore": {
"globalBucket": "OVERLEAF_EDITOR_BLOBS_BUCKET",
"projectBucket": "OVERLEAF_EDITOR_PROJECT_BLOBS_BUCKET"
},
"chunkStore": {
"historyStoreConcurrency": "HISTORY_STORE_CONCURRENCY",
"bucket": "OVERLEAF_EDITOR_CHUNKS_BUCKET"
},
"zipStore": {
"bucket": "OVERLEAF_EDITOR_ZIPS_BUCKET",
"zipTimeoutMs": "ZIP_STORE_ZIP_TIMEOUT_MS"
},
"analytics": {
"bucket": "OVERLEAF_EDITOR_ANALYTICS_BUCKET"
},
"mongo": {
"uri": "MONGO_CONNECTION_STRING"
},
"basicHttpAuth": {
"password": "STAGING_PASSWORD",
"oldPassword": "BASIC_HTTP_AUTH_OLD_PASSWORD"
},
"jwtAuth": {
"key": "OT_JWT_AUTH_KEY",
"oldKey": "OT_JWT_AUTH_OLD_KEY",
"algorithm": "OT_JWT_AUTH_ALG"
},
"clusterWorkers": "CLUSTER_WORKERS",
"maxFileUploadSize": "MAX_FILE_UPLOAD_SIZE",
"httpsOnly": "HTTPS_ONLY"
}

View file

@ -0,0 +1,29 @@
{
"persistor": {
"backend": "s3",
"s3": {
"signedUrlExpiryInMs": "1800000",
"maxRetries": "1",
"httpOptions": {
"timeout": "8000"
}
},
"gcs": {
"signedUrlExpiryInMs": "1800000",
"deleteConcurrency": "50"
}
},
"chunkStore": {
"historyStoreConcurrency": "4"
},
"zipStore": {
"zipTimeoutMs": "360000"
},
"maxDeleteKeys": "1000",
"useDeleteObjects": "true",
"clusterWorkers": "1",
"maxFileUploadSize": "52428800",
"databasePoolMin": "2",
"databasePoolMax": "10",
"httpsOnly": "false"
}

View file

@ -0,0 +1,41 @@
{
"databaseUrl": "postgres://postgres:postgres@postgres/write_latex_dev",
"persistor": {
"s3": {
"endpoint": "http://s3:8080",
"pathStyle": "true"
},
"gcs": {
"unsignedUrls": "true",
"endpoint": {
"apiEndpoint": "fake-gcs:9090",
"apiScheme": "http",
"projectId": "fake"
}
}
},
"blobStore": {
"globalBucket": "overleaf-development-blobs",
"projectBucket": "overleaf-development-project-blobs"
},
"chunkStore": {
"bucket": "overleaf-development-chunks"
},
"zipStore": {
"bucket": "overleaf-development-zips"
},
"analytics": {
"bucket": "overleaf-development-analytics"
},
"useDeleteObjects": "false",
"mongo": {
"uri": "mongodb://mongo:27017/sharelatex"
},
"basicHttpAuth": {
"password": "password"
},
"jwtAuth": {
"key": "secureKey",
"algorithm": "HS256"
}
}

View file

@ -0,0 +1 @@
{ }

View file

@ -0,0 +1,40 @@
{
"databaseUrl": "postgres://sharelatex:sharelatex@postgres/sharelatex",
"persistor": {
"backend": "gcs",
"gcs": {
"unsignedUrls": "true",
"endpoint": {
"apiEndpoint": "gcs:9090",
"apiScheme": "http",
"projectId": "fake"
}
}
},
"blobStore": {
"globalBucket": "overleaf-test-blobs",
"projectBucket": "overleaf-test-project-blobs"
},
"chunkStore": {
"bucket": "overleaf-test-chunks"
},
"zipStore": {
"bucket": "overleaf-test-zips"
},
"analytics": {
"bucket": "overleaf-test-analytics"
},
"maxDeleteKeys": "3",
"useDeleteObjects": "false",
"mongo": {
"uri": "mongodb://mongo:27017/sharelatex"
},
"basicHttpAuth": {
"password": "test"
},
"jwtAuth": {
"key": "testtest",
"algorithm": "HS256"
},
"maxFileUploadSize": "524288"
}

View file

@ -0,0 +1,74 @@
# This file was auto-generated, do not edit it directly.
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
version: "2.3"
services:
test_unit:
image: ci/$PROJECT_NAME:$BRANCH_NAME-$BUILD_NUMBER
user: node
command: npm run test:unit:_run
environment:
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
test_acceptance:
build: .
image: ci/$PROJECT_NAME:$BRANCH_NAME-$BUILD_NUMBER
environment:
ELASTIC_SEARCH_DSN: es:9200
REDIS_HOST: redis
QUEUES_REDIS_HOST: redis
ANALYTICS_QUEUES_REDIS_HOST: redis
MONGO_HOST: mongo
POSTGRES_HOST: postgres
GCS_API_ENDPOINT: gcs:9090
GCS_API_SCHEME: http
GCS_PROJECT_ID: fake
STORAGE_EMULATOR_HOST: http://gcs:9090/storage/v1
MOCHA_GREP: ${MOCHA_GREP}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
depends_on:
mongo:
condition: service_healthy
postgres:
condition: service_healthy
gcs:
condition: service_healthy
user: node
command: npm run test:acceptance:_run
tar:
build: .
image: ci/$PROJECT_NAME:$BRANCH_NAME-$BUILD_NUMBER
volumes:
- ./:/tmp/build/
command: tar -czf /tmp/build/build.tar.gz --exclude=build.tar.gz --exclude-vcs .
user: root
mongo:
image: mongo:4.4.16
healthcheck:
test: "mongo --quiet localhost/test --eval 'quit(db.runCommand({ ping: 1 }).ok ? 0 : 1)'"
interval: 1s
retries: 20
postgres:
image: postgres:10
environment:
POSTGRES_USER: sharelatex
POSTGRES_PASSWORD: sharelatex
healthcheck:
test: pg_isready --quiet
interval: 1s
retries: 20
gcs:
image: fsouza/fake-gcs-server:v1.21.2
command: ["--port=9090", "--scheme=http"]
healthcheck:
test: wget --quiet --output-document=/dev/null http://localhost:9090/storage/v1/b
interval: 1s
retries: 20

View file

@ -0,0 +1,77 @@
# This file was auto-generated, do not edit it directly.
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
version: "2.3"
services:
test_unit:
image: node:16.17.1
volumes:
- .:/overleaf/services/history-v1
- ../../node_modules:/overleaf/node_modules
- ../../libraries:/overleaf/libraries
working_dir: /overleaf/services/history-v1
environment:
MOCHA_GREP: ${MOCHA_GREP}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
command: npm run --silent test:unit
user: node
test_acceptance:
image: node:16.17.1
volumes:
- .:/overleaf/services/history-v1
- ../../node_modules:/overleaf/node_modules
- ../../libraries:/overleaf/libraries
working_dir: /overleaf/services/history-v1
environment:
ELASTIC_SEARCH_DSN: es:9200
REDIS_HOST: redis
QUEUES_REDIS_HOST: redis
ANALYTICS_QUEUES_REDIS_HOST: redis
MONGO_HOST: mongo
POSTGRES_HOST: postgres
GCS_API_ENDPOINT: gcs:9090
GCS_API_SCHEME: http
GCS_PROJECT_ID: fake
STORAGE_EMULATOR_HOST: http://gcs:9090/storage/v1
MOCHA_GREP: ${MOCHA_GREP}
LOG_LEVEL: ERROR
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
user: node
depends_on:
mongo:
condition: service_healthy
postgres:
condition: service_healthy
gcs:
condition: service_healthy
command: npm run --silent test:acceptance
mongo:
image: mongo:4.4.16
healthcheck:
test: "mongo --quiet localhost/test --eval 'quit(db.runCommand({ ping: 1 }).ok ? 0 : 1)'"
interval: 1s
retries: 20
postgres:
image: postgres:10
environment:
POSTGRES_USER: sharelatex
POSTGRES_PASSWORD: sharelatex
healthcheck:
test: pg_isready --host=localhost --quiet
interval: 1s
retries: 20
gcs:
image: fsouza/fake-gcs-server:v1.21.2
command: ["--port=9090", "--scheme=http"]
healthcheck:
test: wget --quiet --output-document=/dev/null http://localhost:9090/storage/v1/b
interval: 1s
retries: 20

View file

@ -0,0 +1,19 @@
const config = require('config')
const baseConfig = {
client: 'postgresql',
connection: config.herokuDatabaseUrl || config.databaseUrl,
pool: {
min: parseInt(config.databasePoolMin, 10),
max: parseInt(config.databasePoolMax, 10),
},
migrations: {
tableName: 'knex_migrations',
},
}
module.exports = {
development: baseConfig,
production: baseConfig,
test: baseConfig,
}

View file

@ -0,0 +1,80 @@
/**
* This is the initial migration, meant to replicate the current state of the
* history database. If tables already exist, this migration is a noop.
*/
exports.up = async function (knex) {
await knex.raw(`
CREATE TABLE IF NOT EXISTS chunks (
id SERIAL,
doc_id integer NOT NULL,
end_version integer NOT NULL,
end_timestamp timestamp without time zone,
CONSTRAINT chunks_version_non_negative CHECK (end_version >= 0)
)
`)
await knex.raw(`
CREATE UNIQUE INDEX IF NOT EXISTS index_chunks_on_doc_id_and_end_version
ON chunks (doc_id, end_version)
`)
await knex.raw(`
CREATE TABLE IF NOT EXISTS old_chunks (
chunk_id integer NOT NULL PRIMARY KEY,
doc_id integer NOT NULL,
end_version integer,
end_timestamp timestamp without time zone,
deleted_at timestamp without time zone
)
`)
await knex.raw(`
CREATE INDEX IF NOT EXISTS index_old_chunks_on_doc_id_and_end_version
ON old_chunks (doc_id, end_version)
`)
await knex.raw(`
CREATE TABLE IF NOT EXISTS pending_chunks (
id SERIAL,
doc_id integer NOT NULL,
end_version integer NOT NULL,
end_timestamp timestamp without time zone,
CONSTRAINT chunks_version_non_negative CHECK (end_version >= 0)
)
`)
await knex.raw(`
CREATE INDEX IF NOT EXISTS index_pending_chunks_on_doc_id_and_id
ON pending_chunks (doc_id, id)
`)
await knex.raw(`
CREATE TABLE IF NOT EXISTS blobs (
hash_bytes bytea NOT NULL PRIMARY KEY,
byte_length integer NOT NULL,
string_length integer,
global boolean,
CONSTRAINT blobs_byte_length_non_negative CHECK (byte_length >= 0),
CONSTRAINT blobs_string_length_non_negative
CHECK (string_length IS NULL OR string_length >= 0)
)
`)
await knex.raw(`
CREATE TABLE IF NOT EXISTS project_blobs (
project_id integer NOT NULL,
hash_bytes bytea NOT NULL,
byte_length integer NOT NULL,
string_length integer,
PRIMARY KEY (project_id, hash_bytes),
CONSTRAINT project_blobs_byte_length_non_negative
CHECK (byte_length >= 0),
CONSTRAINT project_blobs_string_length_non_negative
CHECK (string_length IS NULL OR string_length >= 0)
)
`)
await knex.raw(`CREATE SEQUENCE IF NOT EXISTS docs_id_seq`)
}
exports.down = async function (knex) {
// Don't do anything on the down migration
}

View file

@ -0,0 +1,23 @@
exports.up = async function (knex) {
await knex.raw(`
ALTER TABLE chunks ADD COLUMN start_version integer
`)
await knex.raw(`
ALTER TABLE pending_chunks ADD COLUMN start_version integer
`)
await knex.raw(`
ALTER TABLE old_chunks ADD COLUMN start_version integer
`)
}
exports.down = async function (knex) {
await knex.raw(`
ALTER TABLE chunks DROP COLUMN start_version
`)
await knex.raw(`
ALTER TABLE pending_chunks DROP COLUMN start_version
`)
await knex.raw(`
ALTER TABLE old_chunks DROP COLUMN start_version
`)
}

View file

@ -0,0 +1,41 @@
exports.config = {
// CREATE INDEX CONCURRENTLY can't be run inside a transaction
// If this migration fails in the middle, indexes and constraints will have
// to be cleaned up manually.
transaction: false,
}
exports.up = async function (knex) {
await knex.raw(`
ALTER TABLE chunks
ADD CONSTRAINT chunks_start_version_non_negative
CHECK (start_version IS NOT NULL AND start_version >= 0)
NOT VALID
`)
await knex.raw(`
ALTER TABLE chunks
VALIDATE CONSTRAINT chunks_start_version_non_negative
`)
await knex.raw(`
CREATE UNIQUE INDEX CONCURRENTLY index_chunks_on_doc_id_and_start_version
ON chunks (doc_id, start_version)
`)
await knex.raw(`
ALTER TABLE chunks
ADD UNIQUE USING INDEX index_chunks_on_doc_id_and_start_version
`)
}
exports.down = async function (knex) {
await knex.raw(`
ALTER TABLE chunks
DROP CONSTRAINT IF EXISTS index_chunks_on_doc_id_and_start_version
`)
await knex.raw(`
DROP INDEX IF EXISTS index_chunks_on_doc_id_and_start_version
`)
await knex.raw(`
ALTER TABLE chunks
DROP CONSTRAINT IF EXISTS chunks_start_version_non_negative
`)
}

View file

@ -0,0 +1,7 @@
exports.up = async function (knex) {
await knex.raw(`DROP TABLE IF EXISTS blobs`)
}
exports.down = function (knex) {
// Not reversible
}

View file

@ -0,0 +1,20 @@
{
"ignore": [
".git",
"node_modules/"
],
"verbose": true,
"legacyWatch": true,
"execMap": {
"js": "npm run start"
},
"watch": [
"api/",
"app/js/",
"app.js",
"config/",
"storage/",
"../../libraries/"
],
"ext": "js"
}

View file

@ -0,0 +1,68 @@
{
"name": "overleaf-editor",
"version": "1.0.0",
"description": "Overleaf Editor.",
"author": "",
"license": "Proprietary",
"private": true,
"dependencies": {
"@overleaf/logger": "*",
"@overleaf/metrics": "*",
"@overleaf/o-error": "*",
"@overleaf/object-persistor": "*",
"archiver": "^5.3.0",
"basic-auth": "^2.0.1",
"bluebird": "^3.7.2",
"body-parser": "^1.19.0",
"bunyan": "^1.8.12",
"check-types": "^11.1.2",
"command-line-args": "^3.0.3",
"config": "^1.19.0",
"cookie-parser": "~1.4.5",
"cors": "^2.8.5",
"express": "^4.17.1",
"fs-extra": "^9.0.1",
"generic-pool": "^2.1.1",
"helmet": "^3.22.0",
"http-status": "^1.4.2",
"jsonwebtoken": "^8.5.1",
"knex": "^2.4.0",
"lodash": "^4.17.19",
"mongodb": "^4.11.0",
"overleaf-editor-core": "*",
"pg": "^8.7.1",
"string-to-stream": "^1.0.1",
"swagger-tools": "^0.10.4",
"temp": "^0.8.3",
"throng": "^4.0.0",
"tsscmp": "^1.0.6",
"utf-8-validate": "^5.0.4"
},
"devDependencies": {
"benny": "^3.7.1",
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
"istanbul": "^0.4.5",
"mocha": "^8.4.0",
"node-fetch": "^2.6.7",
"sinon": "^9.0.2",
"swagger-client": "^3.10.0",
"yauzl": "^2.9.1"
},
"scripts": {
"start": "node $NODE_APP_OPTIONS app.js",
"lint": "eslint --max-warnings 0 --format unix .",
"lint:fix": "eslint --fix .",
"format": "prettier --list-different $PWD/'**/*.js'",
"format:fix": "prettier --write $PWD/'**/*.js'",
"test:unit": "npm run test:unit:_run -- --grep=$MOCHA_GREP",
"test:acceptance": "npm run test:acceptance:_run -- --grep=$MOCHA_GREP",
"test:unit:_run": "mocha --recursive --reporter spec $@ test/unit/js",
"test:acceptance:_run": "mocha --recursive --reporter spec --timeout 15000 --exit $@ test/acceptance/js",
"nodemon": "nodemon --config nodemon.json",
"migrate": "knex migrate:latest",
"delete_old_chunks": "node storage/tasks/delete_old_chunks.js",
"fix_duplicate_versions": "node storage/tasks/fix_duplicate_versions.js",
"benchmarks": "node benchmarks/index.js"
}
}

View file

@ -0,0 +1,17 @@
exports.BatchBlobStore = require('./lib/batch_blob_store')
exports.blobHash = require('./lib/blob_hash')
exports.HashCheckBlobStore = require('./lib/hash_check_blob_store')
exports.chunkStore = require('./lib/chunk_store')
exports.historyStore = require('./lib/history_store')
exports.knex = require('./lib/knex')
exports.mongodb = require('./lib/mongodb')
exports.persistChanges = require('./lib/persist_changes')
exports.persistor = require('./lib/persistor')
exports.ProjectArchive = require('./lib/project_archive')
exports.streams = require('./lib/streams')
exports.temp = require('./lib/temp')
exports.zipStore = require('./lib/zip_store')
const { BlobStore, loadGlobalBlobs } = require('./lib/blob_store')
exports.BlobStore = BlobStore
exports.loadGlobalBlobs = loadGlobalBlobs

View file

@ -0,0 +1,52 @@
'use strict'
const check = require('check-types')
const { Blob } = require('overleaf-editor-core')
const assert = check.assert
const MONGO_ID_REGEXP = /^[0-9a-f]{24}$/
const POSTGRES_ID_REGEXP = /^[1-9][0-9]{0,9}$/
const PROJECT_ID_REGEXP = /^([0-9a-f]{24}|[1-9][0-9]{0,9})$/
function transaction(transaction, message) {
assert.function(transaction, message)
}
function blobHash(arg, message) {
assert.match(arg, Blob.HEX_HASH_RX, message)
}
/**
* A chunk id is a string that contains either an integer (for projects stored in Postgres) or 24
* hex digits (for projects stored in Mongo)
*/
function projectId(arg, message) {
assert.match(arg, PROJECT_ID_REGEXP, message)
}
/**
* A chunk id is either a number (for projects stored in Postgres) or a 24
* character string (for projects stored in Mongo)
*/
function chunkId(arg, message) {
const valid = check.integer(arg) || check.match(arg, MONGO_ID_REGEXP)
if (!valid) {
throw new TypeError(message)
}
}
function mongoId(arg, message) {
assert.match(arg, MONGO_ID_REGEXP)
}
module.exports = {
...assert,
transaction,
blobHash,
projectId,
chunkId,
mongoId,
MONGO_ID_REGEXP,
POSTGRES_ID_REGEXP,
}

View file

@ -0,0 +1,40 @@
'use strict'
const BPromise = require('bluebird')
/**
* @constructor
* @param {BlobStore} blobStore
* @classdesc
* Wrapper for BlobStore that pre-fetches blob metadata to avoid making one
* database call per blob lookup.
*/
function BatchBlobStore(blobStore) {
this.blobStore = blobStore
this.blobs = new Map()
}
/**
* Pre-fetch metadata for the given blob hashes.
*
* @param {Array.<string>} hashes
* @return {Promise}
*/
BatchBlobStore.prototype.preload = function batchBlobStorePreload(hashes) {
return BPromise.each(this.blobStore.getBlobs(hashes), blob => {
this.blobs.set(blob.getHash(), blob)
})
}
/**
* @see BlobStore#getBlob
*/
BatchBlobStore.prototype.getBlob = BPromise.method(
function batchBlobStoreGetBlob(hash) {
const blob = this.blobs.get(hash)
if (blob) return blob
return this.blobStore.getBlob(hash)
}
)
module.exports = BatchBlobStore

View file

@ -0,0 +1,78 @@
/** @module */
'use strict'
const BPromise = require('bluebird')
const fs = BPromise.promisifyAll(require('fs'))
const crypto = require('crypto')
const assert = require('./assert')
function getGitBlobHeader(byteLength) {
return 'blob ' + byteLength + '\x00'
}
function getBlobHash(byteLength) {
const hash = crypto.createHash('sha1')
hash.setEncoding('hex')
hash.update(getGitBlobHeader(byteLength))
return hash
}
/**
* Compute the git blob hash for a blob from a readable stream of its content.
*
* @function
* @param {number} byteLength
* @param {stream.Readable} stream
* @return {Promise.<string>} hexadecimal SHA-1 hash
*/
exports.fromStream = BPromise.method(function blobHashFromStream(
byteLength,
stream
) {
assert.integer(byteLength, 'blobHash: bad byteLength')
assert.object(stream, 'blobHash: bad stream')
const hash = getBlobHash(byteLength)
return new BPromise(function (resolve, reject) {
stream.on('end', function () {
hash.end()
resolve(hash.read())
})
stream.on('error', reject)
stream.pipe(hash)
})
})
/**
* Compute the git blob hash for a blob with the given string content.
*
* @param {string} string
* @return {string} hexadecimal SHA-1 hash
*/
exports.fromString = function blobHashFromString(string) {
assert.string(string, 'blobHash: bad string')
const hash = getBlobHash(Buffer.byteLength(string))
hash.update(string, 'utf8')
hash.end()
return hash.read()
}
/**
* Compute the git blob hash for the content of a file
*
* @param {string} filePath
* @return {string} hexadecimal SHA-1 hash
*/
exports.fromFile = function blobHashFromFile(pathname) {
assert.string(pathname, 'blobHash: bad pathname')
function getByteLengthOfFile() {
return fs.statAsync(pathname).then(stat => stat.size)
}
const fromStream = this.fromStream
return getByteLengthOfFile(pathname).then(function (byteLength) {
const stream = fs.createReadStream(pathname)
return fromStream(byteLength, stream)
})
}

View file

@ -0,0 +1,290 @@
'use strict'
const config = require('config')
const fs = require('fs')
const isValidUtf8 = require('utf-8-validate')
const stringToStream = require('string-to-stream')
const core = require('overleaf-editor-core')
const objectPersistor = require('@overleaf/object-persistor')
const OError = require('@overleaf/o-error')
const Blob = core.Blob
const TextOperation = core.TextOperation
const containsNonBmpChars = core.util.containsNonBmpChars
const assert = require('../assert')
const blobHash = require('../blob_hash')
const mongodb = require('../mongodb')
const persistor = require('../persistor')
const projectKey = require('../project_key')
const streams = require('../streams')
const postgresBackend = require('./postgres')
const mongoBackend = require('./mongo')
const GLOBAL_BLOBS = new Map()
function makeGlobalKey(hash) {
return `${hash.slice(0, 2)}/${hash.slice(2, 4)}/${hash.slice(4)}`
}
function makeProjectKey(projectId, hash) {
return `${projectKey.format(projectId)}/${hash.slice(0, 2)}/${hash.slice(2)}`
}
async function uploadBlob(projectId, blob, stream) {
const bucket = config.get('blobStore.projectBucket')
const key = makeProjectKey(projectId, blob.getHash())
await persistor.sendStream(bucket, key, stream, {
contentType: 'application/octet-stream',
})
}
function getBlobLocation(projectId, hash) {
if (GLOBAL_BLOBS.has(hash)) {
return {
bucket: config.get('blobStore.globalBucket'),
key: makeGlobalKey(hash),
}
} else {
return {
bucket: config.get('blobStore.projectBucket'),
key: makeProjectKey(projectId, hash),
}
}
}
/**
* Returns the appropriate backend for the given project id
*
* Numeric ids use the Postgres backend.
* Strings of 24 characters use the Mongo backend.
*/
function getBackend(projectId) {
if (assert.POSTGRES_ID_REGEXP.test(projectId)) {
return postgresBackend
} else if (assert.MONGO_ID_REGEXP.test(projectId)) {
return mongoBackend
} else {
throw new OError('bad project id', { projectId })
}
}
async function makeBlobForFile(pathname) {
async function getByteLengthOfFile() {
const stat = await fs.promises.stat(pathname)
return stat.size
}
async function getHashOfFile(blob) {
const stream = fs.createReadStream(pathname)
const hash = await blobHash.fromStream(blob.getByteLength(), stream)
return hash
}
const blob = new Blob()
const byteLength = await getByteLengthOfFile()
blob.setByteLength(byteLength)
const hash = await getHashOfFile(blob)
blob.setHash(hash)
return blob
}
async function getStringLengthOfFile(byteLength, pathname) {
// We have to read the file into memory to get its UTF-8 length, so don't
// bother for files that are too large for us to edit anyway.
if (byteLength > Blob.MAX_EDITABLE_BYTE_LENGTH_BOUND) {
return null
}
// We need to check if the file contains nonBmp or null characters
let data = await fs.promises.readFile(pathname)
if (!isValidUtf8(data)) return null
data = data.toString()
if (data.length > TextOperation.MAX_STRING_LENGTH) return null
if (containsNonBmpChars(data)) return null
if (data.indexOf('\x00') !== -1) return null
return data.length
}
async function deleteBlobsInBucket(projectId) {
const bucket = config.get('blobStore.projectBucket')
const prefix = `${projectKey.format(projectId)}/`
await persistor.deleteDirectory(bucket, prefix)
}
async function loadGlobalBlobs() {
const blobs = await mongodb.globalBlobs.find()
for await (const blob of blobs) {
GLOBAL_BLOBS.set(blob._id, {
blob: new Blob(blob._id, blob.byteLength, blob.stringLength),
demoted: Boolean(blob.demoted),
})
}
}
/**
* @classdesc
* Fetch and store the content of files using content-addressable hashing. The
* blob store manages both content and metadata (byte and UTF-8 length) for
* blobs.
*/
class BlobStore {
/**
* @constructor
* @param {string} projectId the project for which we'd like to find blobs
*/
constructor(projectId) {
assert.projectId(projectId)
this.projectId = projectId
this.backend = getBackend(this.projectId)
}
/**
* Set up the initial data structure for a given project
*/
async initialize() {
await this.backend.initialize(this.projectId)
}
/**
* Write a blob, if one does not already exist, with the given UTF-8 encoded
* string content.
*
* @param {string} string
* @return {Promise.<Blob>}
*/
async putString(string) {
assert.string(string, 'bad string')
const hash = blobHash.fromString(string)
const existingBlob = await this._findBlobBeforeInsert(hash)
if (existingBlob != null) {
return existingBlob
}
const newBlob = new Blob(hash, Buffer.byteLength(string), string.length)
// Note: the stringToStream is to work around a bug in the AWS SDK: it won't
// allow Body to be blank.
await uploadBlob(this.projectId, newBlob, stringToStream(string))
await this.backend.insertBlob(this.projectId, newBlob)
return newBlob
}
/**
* Write a blob, if one does not already exist, with the given file (usually a
* temporary file).
*
* @param {string} pathname
* @return {Promise.<Blob>}
*/
async putFile(pathname) {
assert.string(pathname, 'bad pathname')
const newBlob = await makeBlobForFile(pathname)
const existingBlob = await this._findBlobBeforeInsert(newBlob.getHash())
if (existingBlob != null) {
return existingBlob
}
const stringLength = await getStringLengthOfFile(
newBlob.getByteLength(),
pathname
)
newBlob.setStringLength(stringLength)
await uploadBlob(this.projectId, newBlob, fs.createReadStream(pathname))
await this.backend.insertBlob(this.projectId, newBlob)
return newBlob
}
/**
* Fetch a blob's content by its hash as a UTF-8 encoded string.
*
* @param {string} hash hexadecimal SHA-1 hash
* @return {Promise.<string>} promise for the content of the file
*/
async getString(hash) {
assert.blobHash(hash, 'bad hash')
const stream = await this.getStream(hash)
const buffer = await streams.readStreamToBuffer(stream)
return buffer.toString()
}
/**
* Fetch a blob by its hash as a stream.
*
* Note that, according to the AWS SDK docs, this does not retry after initial
* failure, so the caller must be prepared to retry on errors, if appropriate.
*
* @param {string} hash hexadecimal SHA-1 hash
* @return {stream} a stream to read the file
*/
async getStream(hash) {
assert.blobHash(hash, 'bad hash')
const { bucket, key } = getBlobLocation(this.projectId, hash)
try {
const stream = await persistor.getObjectStream(bucket, key)
return stream
} catch (err) {
if (err instanceof objectPersistor.Errors.NotFoundError) {
throw new Blob.NotFoundError(hash)
}
throw err
}
}
/**
* Read a blob metadata record by hexadecimal hash.
*
* @param {string} hash hexadecimal SHA-1 hash
* @return {Promise.<Blob?>}
*/
async getBlob(hash) {
assert.blobHash(hash, 'bad hash')
const globalBlob = GLOBAL_BLOBS.get(hash)
if (globalBlob != null) {
return globalBlob.blob
}
const blob = await this.backend.findBlob(this.projectId, hash)
return blob
}
async getBlobs(hashes) {
assert.array(hashes, 'bad hashes')
const nonGlobalHashes = []
const blobs = []
for (const hash of hashes) {
const globalBlob = GLOBAL_BLOBS.get(hash)
if (globalBlob != null) {
blobs.push(globalBlob.blob)
} else {
nonGlobalHashes.push(hash)
}
}
const projectBlobs = await this.backend.findBlobs(
this.projectId,
nonGlobalHashes
)
blobs.push(...projectBlobs)
return blobs
}
/**
* Delete all blobs that belong to the project.
*/
async deleteBlobs() {
await Promise.all([
this.backend.deleteBlobs(this.projectId),
deleteBlobsInBucket(this.projectId),
])
}
async _findBlobBeforeInsert(hash) {
const globalBlob = GLOBAL_BLOBS.get(hash)
if (globalBlob != null && !globalBlob.demoted) {
return globalBlob.blob
}
const blob = await this.backend.findBlob(this.projectId, hash)
return blob
}
}
module.exports = { BlobStore, loadGlobalBlobs }

View file

@ -0,0 +1,289 @@
/**
* Mongo backend for the blob store.
*
* Blobs are stored in the projectHistoryBlobs collection. Each project has a
* document in that collection. That document has a "blobs" subdocument whose
* fields are buckets of blobs. The key of a bucket is the first three hex
* digits of the blob hash. The value of the bucket is an array of blobs that
* match the key.
*
* Buckets have a maximum capacity of 8 blobs. When that capacity is exceeded,
* blobs are stored in a secondary collection: the projectHistoryShardedBlobs
* collection. This collection shards blobs between 16 documents per project.
* The shard key is the first hex digit of the hash. The documents are also
* organized in buckets, but the bucket key is made of hex digits 2, 3 and 4.
*/
const { Blob } = require('overleaf-editor-core')
const { ObjectId, Binary } = require('mongodb')
const assert = require('../assert')
const mongodb = require('../mongodb')
const MAX_BLOBS_IN_BUCKET = 8
const DUPLICATE_KEY_ERROR_CODE = 11000
/**
* Set up the data structures for a given project.
*/
async function initialize(projectId) {
assert.mongoId(projectId, 'bad projectId')
try {
await mongodb.blobs.insertOne({
_id: ObjectId(projectId),
blobs: {},
})
} catch (err) {
if (err.code !== DUPLICATE_KEY_ERROR_CODE) {
throw err
}
}
}
/**
* Return blob metadata for the given project and hash.
*/
async function findBlob(projectId, hash) {
assert.mongoId(projectId, 'bad projectId')
assert.blobHash(hash, 'bad hash')
const bucket = getBucket(hash)
const result = await mongodb.blobs.findOne(
{ _id: ObjectId(projectId) },
{ projection: { _id: 0, bucket: `$${bucket}` } }
)
if (result?.bucket == null) {
return null
}
const record = result.bucket.find(blob => blob.h.toString('hex') === hash)
if (record == null) {
if (result.bucket.length >= MAX_BLOBS_IN_BUCKET) {
return await findBlobSharded(projectId, hash)
} else {
return null
}
}
return recordToBlob(record)
}
/**
* Search in the sharded collection for blob metadata
*/
async function findBlobSharded(projectId, hash) {
const [shard, bucket] = getShardedBucket(hash)
const id = makeShardedId(projectId, shard)
const result = await mongodb.shardedBlobs.findOne(
{ _id: id },
{ projection: { _id: 0, blobs: `$${bucket}` } }
)
if (result?.blobs == null) {
return null
}
const record = result.blobs.find(blob => blob.h.toString('hex') === hash)
return recordToBlob(record)
}
/**
* Read multiple blob metadata records by hexadecimal hashes.
*/
async function findBlobs(projectId, hashes) {
assert.mongoId(projectId, 'bad projectId')
assert.array(hashes, 'bad hashes: not array')
hashes.forEach(function (hash) {
assert.blobHash(hash, 'bad hash')
})
// Build a set of unique buckets
const buckets = new Set(hashes.map(getBucket))
// Get buckets from Mongo
const projection = { _id: 0 }
for (const bucket of buckets) {
projection[bucket] = 1
}
const result = await mongodb.blobs.findOne(
{ _id: ObjectId(projectId) },
{ projection }
)
if (result?.blobs == null) {
return []
}
// Build blobs from the query results
const hashSet = new Set(hashes)
const blobs = []
for (const bucket of Object.values(result.blobs)) {
for (const record of bucket) {
const hash = record.h.toString('hex')
if (hashSet.has(hash)) {
blobs.push(recordToBlob(record))
hashSet.delete(hash)
}
}
}
// If we haven't found all the blobs, look in the sharded collection
if (hashSet.size > 0) {
const shardedBlobs = await findBlobsSharded(projectId, hashSet)
blobs.push(...shardedBlobs)
}
return blobs
}
/**
* Search in the sharded collection for blob metadata.
*/
async function findBlobsSharded(projectId, hashSet) {
// Build a map of buckets by shard key
const bucketsByShard = new Map()
for (const hash of hashSet) {
const [shard, bucket] = getShardedBucket(hash)
let buckets = bucketsByShard.get(shard)
if (buckets == null) {
buckets = new Set()
bucketsByShard.set(shard, buckets)
}
buckets.add(bucket)
}
// Make parallel requests to the shards that might contain the hashes we want
const requests = []
for (const [shard, buckets] of bucketsByShard.entries()) {
const id = makeShardedId(projectId, shard)
const projection = { _id: 0 }
for (const bucket of buckets) {
projection[bucket] = 1
}
const request = mongodb.shardedBlobs.findOne({ _id: id }, { projection })
requests.push(request)
}
const results = await Promise.all(requests)
// Build blobs from the query results
const blobs = []
for (const result of results) {
if (result?.blobs == null) {
continue
}
for (const bucket of Object.values(result.blobs)) {
for (const record of bucket) {
const hash = record.h.toString('hex')
if (hashSet.has(hash)) {
blobs.push(recordToBlob(record))
}
}
}
}
return blobs
}
/**
* Add a blob's metadata to the blobs collection after it has been uploaded.
*/
async function insertBlob(projectId, blob) {
assert.mongoId(projectId, 'bad projectId')
const hash = blob.getHash()
const bucket = getBucket(hash)
const record = blobToRecord(blob)
const result = await mongodb.blobs.updateOne(
{
_id: ObjectId(projectId),
$expr: {
$lt: [{ $size: { $ifNull: [`$${bucket}`, []] } }, MAX_BLOBS_IN_BUCKET],
},
},
{
$addToSet: { [bucket]: record },
}
)
if (result.matchedCount === 0) {
await insertRecordSharded(projectId, hash, record)
}
}
/**
* Add a blob's metadata to the sharded blobs collection.
*/
async function insertRecordSharded(projectId, hash, record) {
const [shard, bucket] = getShardedBucket(hash)
const id = makeShardedId(projectId, shard)
await mongodb.shardedBlobs.updateOne(
{ _id: id },
{ $addToSet: { [bucket]: record } },
{ upsert: true }
)
}
/**
* Delete all blobs for a given project.
*/
async function deleteBlobs(projectId) {
assert.mongoId(projectId, 'bad projectId')
await mongodb.blobs.deleteOne({ _id: ObjectId(projectId) })
const minShardedId = makeShardedId(projectId, '0')
const maxShardedId = makeShardedId(projectId, 'f')
await mongodb.shardedBlobs.deleteMany({
_id: { $gte: minShardedId, $lte: maxShardedId },
})
}
/**
* Return the Mongo path to the bucket for the given hash.
*/
function getBucket(hash) {
return `blobs.${hash.slice(0, 3)}`
}
/**
* Return the shard key and Mongo path to the bucket for the given hash in the
* sharded collection.
*/
function getShardedBucket(hash) {
const shard = hash.slice(0, 1)
const bucket = `blobs.${hash.slice(1, 4)}`
return [shard, bucket]
}
/**
* Create an _id key for the sharded collection.
*/
function makeShardedId(projectId, shard) {
return new Binary(Buffer.from(`${projectId}0${shard}`, 'hex'))
}
/**
* Return the Mongo record for the given blob.
*/
function blobToRecord(blob) {
const hash = blob.getHash()
const byteLength = blob.getByteLength()
const stringLength = blob.getStringLength()
return {
h: new Binary(Buffer.from(hash, 'hex')),
b: byteLength,
s: stringLength,
}
}
/**
* Create a blob from the given Mongo record.
*/
function recordToBlob(record) {
if (record == null) {
return
}
return new Blob(record.h.toString('hex'), record.b, record.s)
}
module.exports = {
initialize,
findBlob,
findBlobs,
insertBlob,
deleteBlobs,
}

Some files were not shown because too many files have changed in this diff Show more