Merge pull request #63 from overleaf/spd-object-persistor

Add support for object-persistor
This commit is contained in:
Simon Detheridge 2020-08-11 10:44:25 +01:00 committed by GitHub
commit 0c83e19863
23 changed files with 1791 additions and 1312 deletions

View file

@ -0,0 +1,17 @@
version: 2
updates:
- package-ecosystem: "npm"
directory: "/"
schedule:
interval: "daily"
pull-request-branch-name:
# Separate sections of the branch name with a hyphen
# Docker images use the branch name and do not support slashes in tags
# https://github.com/overleaf/google-ops/issues/822
# https://docs.github.com/en/github/administering-a-repository/configuration-options-for-dependency-updates#pull-request-branch-nameseparator
separator: "-"
# Block informal upgrades -- security upgrades use a separate queue.
# https://docs.github.com/en/github/administering-a-repository/configuration-options-for-dependency-updates#open-pull-requests-limit
open-pull-requests-limit: 0

View file

@ -15,8 +15,6 @@ RUN npm ci --quiet
COPY . /app
FROM base
COPY --from=app /app /app

View file

@ -1,131 +0,0 @@
String cron_string = BRANCH_NAME == "master" ? "@daily" : ""
pipeline {
agent any
environment {
GIT_PROJECT = "docstore"
JENKINS_WORKFLOW = "docstore-sharelatex"
TARGET_URL = "${env.JENKINS_URL}blue/organizations/jenkins/${JENKINS_WORKFLOW}/detail/$BRANCH_NAME/$BUILD_NUMBER/pipeline"
GIT_API_URL = "https://api.github.com/repos/overleaf/${GIT_PROJECT}/statuses/$GIT_COMMIT"
}
triggers {
pollSCM('* * * * *')
cron(cron_string)
}
stages {
stage('Install') {
steps {
withCredentials([usernamePassword(credentialsId: 'GITHUB_INTEGRATION', usernameVariable: 'GH_AUTH_USERNAME', passwordVariable: 'GH_AUTH_PASSWORD')]) {
sh "curl $GIT_API_URL \
--data '{ \
\"state\" : \"pending\", \
\"target_url\": \"$TARGET_URL\", \
\"description\": \"Your build is underway\", \
\"context\": \"ci/jenkins\" }' \
-u $GH_AUTH_USERNAME:$GH_AUTH_PASSWORD"
}
}
}
stage('Build') {
steps {
sh 'make build'
}
}
stage('Linting') {
steps {
sh 'DOCKER_COMPOSE_FLAGS="-f docker-compose.ci.yml" make format'
sh 'DOCKER_COMPOSE_FLAGS="-f docker-compose.ci.yml" make lint'
}
}
stage('Unit Tests') {
steps {
sh 'DOCKER_COMPOSE_FLAGS="-f docker-compose.ci.yml" make test_unit'
}
}
stage('Acceptance Tests') {
steps {
sh 'DOCKER_COMPOSE_FLAGS="-f docker-compose.ci.yml" make test_acceptance'
}
}
stage('Package and docker push') {
steps {
sh 'echo ${BUILD_NUMBER} > build_number.txt'
sh 'touch build.tar.gz' // Avoid tar warning about files changing during read
sh 'DOCKER_COMPOSE_FLAGS="-f docker-compose.ci.yml" make tar'
withCredentials([file(credentialsId: 'gcr.io_overleaf-ops', variable: 'DOCKER_REPO_KEY_PATH')]) {
sh 'docker login -u _json_key --password-stdin https://gcr.io/overleaf-ops < ${DOCKER_REPO_KEY_PATH}'
}
sh 'DOCKER_REPO=gcr.io/overleaf-ops make publish'
sh 'docker logout https://gcr.io/overleaf-ops'
}
}
stage('Publish to s3') {
steps {
sh 'echo ${BRANCH_NAME}-${BUILD_NUMBER} > build_number.txt'
withAWS(credentials:'S3_CI_BUILDS_AWS_KEYS', region:"${S3_REGION_BUILD_ARTEFACTS}") {
s3Upload(file:'build.tar.gz', bucket:"${S3_BUCKET_BUILD_ARTEFACTS}", path:"${JOB_NAME}/${BUILD_NUMBER}.tar.gz")
}
withAWS(credentials:'S3_CI_BUILDS_AWS_KEYS', region:"${S3_REGION_BUILD_ARTEFACTS}") {
// The deployment process uses this file to figure out the latest build
s3Upload(file:'build_number.txt', bucket:"${S3_BUCKET_BUILD_ARTEFACTS}", path:"${JOB_NAME}/latest")
}
}
}
}
post {
always {
sh 'DOCKER_COMPOSE_FLAGS="-f docker-compose.ci.yml" make test_clean'
sh 'make clean'
}
success {
withCredentials([usernamePassword(credentialsId: 'GITHUB_INTEGRATION', usernameVariable: 'GH_AUTH_USERNAME', passwordVariable: 'GH_AUTH_PASSWORD')]) {
sh "curl $GIT_API_URL \
--data '{ \
\"state\" : \"success\", \
\"target_url\": \"$TARGET_URL\", \
\"description\": \"Your build succeeded!\", \
\"context\": \"ci/jenkins\" }' \
-u $GH_AUTH_USERNAME:$GH_AUTH_PASSWORD"
}
}
failure {
mail(from: "${EMAIL_ALERT_FROM}",
to: "${EMAIL_ALERT_TO}",
subject: "Jenkins build failed: ${JOB_NAME}:${BUILD_NUMBER}",
body: "Build: ${BUILD_URL}")
withCredentials([usernamePassword(credentialsId: 'GITHUB_INTEGRATION', usernameVariable: 'GH_AUTH_USERNAME', passwordVariable: 'GH_AUTH_PASSWORD')]) {
sh "curl $GIT_API_URL \
--data '{ \
\"state\" : \"failure\", \
\"target_url\": \"$TARGET_URL\", \
\"description\": \"Your build failed\", \
\"context\": \"ci/jenkins\" }' \
-u $GH_AUTH_USERNAME:$GH_AUTH_PASSWORD"
}
}
}
// The options directive is for configuration that applies to the whole job.
options {
// we'd like to make sure remove old builds, so we don't fill up our storage!
buildDiscarder(logRotator(numToKeepStr:'50'))
// And we'd really like to be sure that this build doesn't hang forever, so let's time it out after:
timeout(time: 30, unit: 'MINUTES')
}
}

View file

@ -25,13 +25,13 @@ clean:
docker rmi gcr.io/overleaf-ops/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER)
format:
$(DOCKER_COMPOSE) run --rm test_unit npm run format
$(DOCKER_COMPOSE) run --rm test_unit npm run --silent format
format_fix:
$(DOCKER_COMPOSE) run --rm test_unit npm run format:fix
$(DOCKER_COMPOSE) run --rm test_unit npm run --silent format:fix
lint:
$(DOCKER_COMPOSE) run --rm test_unit npm run lint
$(DOCKER_COMPOSE) run --rm test_unit npm run --silent lint
test: format lint test_unit test_acceptance

View file

@ -1,295 +1,180 @@
/* eslint-disable
camelcase,
handle-callback-err,
no-useless-escape,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let DocArchive
const MongoManager = require('./MongoManager')
const { callbackify } = require('util')
const MongoManager = require('./MongoManager').promises
const Errors = require('./Errors')
const logger = require('logger-sharelatex')
const _ = require('underscore')
const async = require('async')
const settings = require('settings-sharelatex')
const request = require('request')
const crypto = require('crypto')
const Streamifier = require('streamifier')
const RangeManager = require('./RangeManager')
const thirtySeconds = 30 * 1000
const PersistorManager = require('./PersistorManager')
const asyncPool = require('tiny-async-pool')
module.exports = DocArchive = {
archiveAllDocs(project_id, callback) {
if (callback == null) {
callback = function (err, docs) {}
}
return MongoManager.getProjectsDocs(
project_id,
{ include_deleted: true },
{ lines: true, ranges: true, rev: true, inS3: true },
function (err, docs) {
if (err != null) {
return callback(err)
} else if (docs == null) {
return callback(
new Errors.NotFoundError(`No docs for project ${project_id}`)
)
}
docs = _.filter(docs, (doc) => doc.inS3 !== true)
const jobs = _.map(docs, (doc) => (cb) =>
DocArchive.archiveDoc(project_id, doc, cb)
)
return async.parallelLimit(jobs, 5, callback)
}
)
},
const PARALLEL_JOBS = 5
archiveDoc(project_id, doc, callback) {
let options
logger.log({ project_id, doc_id: doc._id }, 'sending doc to s3')
try {
options = DocArchive.buildS3Options(project_id + '/' + doc._id)
} catch (e) {
return callback(e)
}
return DocArchive._mongoDocToS3Doc(doc, function (error, json_doc) {
if (error != null) {
return callback(error)
}
options.body = json_doc
options.headers = { 'Content-Type': 'application/json' }
return request.put(options, function (err, res) {
if (err != null || res.statusCode !== 200) {
logger.err(
{
err,
res,
project_id,
doc_id: doc._id,
statusCode: res != null ? res.statusCode : undefined
},
'something went wrong archiving doc in aws'
)
return callback(new Error('Error in S3 request'))
}
const md5lines = crypto
.createHash('md5')
.update(json_doc, 'utf8')
.digest('hex')
const md5response = res.headers.etag.toString().replace(/\"/g, '')
if (md5lines !== md5response) {
logger.err(
{
responseMD5: md5response,
linesMD5: md5lines,
project_id,
doc_id: doc != null ? doc._id : undefined
},
'err in response md5 from s3'
)
return callback(new Error('Error in S3 md5 response'))
}
return MongoManager.markDocAsArchived(doc._id, doc.rev, function (err) {
if (err != null) {
return callback(err)
}
return callback()
})
})
})
},
unArchiveAllDocs(project_id, callback) {
if (callback == null) {
callback = function (err) {}
}
return MongoManager.getArchivedProjectDocs(project_id, function (
err,
docs
) {
if (err != null) {
logger.err({ err, project_id }, 'error unarchiving all docs')
return callback(err)
} else if (docs == null) {
return callback(
new Errors.NotFoundError(`No docs for project ${project_id}`)
)
}
const jobs = _.map(
docs,
(doc) =>
function (cb) {
if (doc.inS3 == null) {
return cb()
} else {
return DocArchive.unarchiveDoc(project_id, doc._id, cb)
}
}
)
return async.parallelLimit(jobs, 5, callback)
})
},
unarchiveDoc(project_id, doc_id, callback) {
let options
logger.log({ project_id, doc_id }, 'getting doc from s3')
try {
options = DocArchive.buildS3Options(project_id + '/' + doc_id)
} catch (e) {
return callback(e)
}
options.json = true
return request.get(options, function (err, res, doc) {
if (err != null || res.statusCode !== 200) {
logger.err(
{ err, res, project_id, doc_id },
'something went wrong unarchiving doc from aws'
)
return callback(new Errors.NotFoundError('Error in S3 request'))
}
return DocArchive._s3DocToMongoDoc(doc, function (error, mongo_doc) {
if (error != null) {
return callback(error)
}
return MongoManager.upsertIntoDocCollection(
project_id,
doc_id.toString(),
mongo_doc,
function (err) {
if (err != null) {
return callback(err)
}
logger.log({ project_id, doc_id }, 'deleting doc from s3')
return DocArchive._deleteDocFromS3(project_id, doc_id, callback)
}
)
})
})
},
destroyAllDocs(project_id, callback) {
if (callback == null) {
callback = function (err) {}
}
return MongoManager.getProjectsDocs(
project_id,
{ include_deleted: true },
{ _id: 1 },
function (err, docs) {
if (err != null) {
logger.err({ err, project_id }, "error getting project's docs")
return callback(err)
} else if (docs == null) {
return callback()
}
const jobs = _.map(docs, (doc) => (cb) =>
DocArchive.destroyDoc(project_id, doc._id, cb)
)
return async.parallelLimit(jobs, 5, callback)
}
)
},
destroyDoc(project_id, doc_id, callback) {
logger.log({ project_id, doc_id }, 'removing doc from mongo and s3')
return MongoManager.findDoc(project_id, doc_id, { inS3: 1 }, function (
error,
doc
) {
if (error != null) {
return callback(error)
}
if (doc == null) {
return callback(new Errors.NotFoundError('Doc not found in Mongo'))
}
if (doc.inS3 === true) {
return DocArchive._deleteDocFromS3(project_id, doc_id, function (err) {
if (err != null) {
return err
}
return MongoManager.destroyDoc(doc_id, callback)
})
} else {
return MongoManager.destroyDoc(doc_id, callback)
}
})
},
_deleteDocFromS3(project_id, doc_id, callback) {
let options
try {
options = DocArchive.buildS3Options(project_id + '/' + doc_id)
} catch (e) {
return callback(e)
}
options.json = true
return request.del(options, function (err, res, body) {
if (err != null || res.statusCode !== 204) {
logger.err(
{ err, res, project_id, doc_id },
'something went wrong deleting doc from aws'
)
return callback(new Error('Error in S3 request'))
}
return callback()
})
},
_s3DocToMongoDoc(doc, callback) {
if (callback == null) {
callback = function (error, mongo_doc) {}
}
const mongo_doc = {}
if (doc.schema_v === 1 && doc.lines != null) {
mongo_doc.lines = doc.lines
if (doc.ranges != null) {
mongo_doc.ranges = RangeManager.jsonRangesToMongo(doc.ranges)
}
} else if (doc instanceof Array) {
mongo_doc.lines = doc
} else {
return callback(new Error("I don't understand the doc format in s3"))
}
return callback(null, mongo_doc)
},
_mongoDocToS3Doc(doc, callback) {
if (callback == null) {
callback = function (error, s3_doc) {}
}
if (doc.lines == null) {
return callback(new Error('doc has no lines'))
}
const json = JSON.stringify({
lines: doc.lines,
ranges: doc.ranges,
schema_v: 1
})
if (json.indexOf('\u0000') !== -1) {
const error = new Error('null bytes detected')
logger.err({ err: error, doc, json }, error.message)
return callback(error)
}
return callback(null, json)
},
buildS3Options(key) {
if (settings.docstore.s3 == null) {
throw new Error('S3 settings are not configured')
}
return {
aws: {
key: settings.docstore.s3.key,
secret: settings.docstore.s3.secret,
bucket: settings.docstore.s3.bucket
},
timeout: thirtySeconds,
uri: `https://${settings.docstore.s3.bucket}.s3.amazonaws.com/${key}`
}
module.exports = {
archiveAllDocs: callbackify(archiveAllDocs),
archiveDoc: callbackify(archiveDoc),
unArchiveAllDocs: callbackify(unArchiveAllDocs),
unarchiveDoc: callbackify(unarchiveDoc),
destroyAllDocs: callbackify(destroyAllDocs),
destroyDoc: callbackify(destroyDoc),
promises: {
archiveAllDocs,
archiveDoc,
unArchiveAllDocs,
unarchiveDoc,
destroyAllDocs,
destroyDoc
}
}
async function archiveAllDocs(projectId) {
const docs = await MongoManager.getProjectsDocs(
projectId,
{ include_deleted: true },
{ lines: true, ranges: true, rev: true, inS3: true }
)
if (!docs) {
throw new Errors.NotFoundError(`No docs for project ${projectId}`)
}
const docsToArchive = docs.filter((doc) => !doc.inS3)
if (docsToArchive.length) {
await asyncPool(PARALLEL_JOBS, docsToArchive, (doc) =>
archiveDoc(projectId, doc)
)
}
}
async function archiveDoc(projectId, doc) {
logger.log(
{ project_id: projectId, doc_id: doc._id },
'sending doc to persistor'
)
const key = `${projectId}/${doc._id}`
if (doc.lines == null) {
throw new Error('doc has no lines')
}
const json = JSON.stringify({
lines: doc.lines,
ranges: doc.ranges,
schema_v: 1
})
// this should never happen, but protects against memory-corruption errors that
// have happened in the past
if (json.indexOf('\u0000') > -1) {
const error = new Error('null bytes detected')
logger.err({ err: error, doc }, error.message)
throw error
}
const md5 = crypto.createHash('md5').update(json).digest('hex')
const stream = Streamifier.createReadStream(json)
await PersistorManager.sendStream(settings.docstore.bucket, key, stream, {
sourceMd5: md5
})
await MongoManager.markDocAsArchived(doc._id, doc.rev)
}
async function unArchiveAllDocs(projectId) {
const docs = await MongoManager.getArchivedProjectDocs(projectId)
if (!docs) {
throw new Errors.NotFoundError(`No docs for project ${projectId}`)
}
if (!docs.length) {
// asyncPool will throw an error with an empty array
return
}
await asyncPool(PARALLEL_JOBS, docs, (doc) =>
unarchiveDoc(projectId, doc._id)
)
}
async function unarchiveDoc(projectId, docId) {
logger.log(
{ project_id: projectId, doc_id: docId },
'getting doc from persistor'
)
const key = `${projectId}/${docId}`
const sourceMd5 = await PersistorManager.getObjectMd5Hash(
settings.docstore.bucket,
key
)
const stream = await PersistorManager.getObjectStream(
settings.docstore.bucket,
key
)
stream.resume()
const json = await _streamToString(stream)
const md5 = crypto.createHash('md5').update(json).digest('hex')
if (sourceMd5 !== md5) {
throw new Errors.Md5MismatchError('md5 mismatch when downloading doc', {
key,
sourceMd5,
md5
})
}
const doc = JSON.parse(json)
const mongoDoc = {}
if (doc.schema_v === 1 && doc.lines != null) {
mongoDoc.lines = doc.lines
if (doc.ranges != null) {
mongoDoc.ranges = RangeManager.jsonRangesToMongo(doc.ranges)
}
} else if (Array.isArray(doc)) {
mongoDoc.lines = doc
} else {
throw new Error("I don't understand the doc format in s3")
}
await MongoManager.upsertIntoDocCollection(projectId, docId, mongoDoc)
await PersistorManager.deleteObject(settings.docstore.bucket, key)
}
async function destroyAllDocs(projectId) {
const docs = await MongoManager.getProjectsDocs(
projectId,
{ include_deleted: true },
{ _id: 1 }
)
if (docs && docs.length) {
await asyncPool(PARALLEL_JOBS, docs, (doc) =>
destroyDoc(projectId, doc._id)
)
}
}
async function destroyDoc(projectId, docId) {
logger.log(
{ project_id: projectId, doc_id: docId },
'removing doc from mongo and persistor'
)
const doc = await MongoManager.findDoc(projectId, docId, {
inS3: 1
})
if (!doc) {
throw new Errors.NotFoundError('Doc not found in Mongo')
}
if (doc.inS3) {
await PersistorManager.deleteObject(
settings.docstore.bucket,
`${projectId}/${docId}`
)
}
await MongoManager.destroyDoc(docId)
}
async function _streamToString(stream) {
const chunks = []
return new Promise((resolve, reject) => {
stream.on('data', (chunk) => chunks.push(chunk))
stream.on('error', reject)
stream.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')))
})
}

View file

@ -1,16 +1,10 @@
/* eslint-disable
no-proto,
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
let Errors
var NotFoundError = function (message) {
const error = new Error(message)
error.name = 'NotFoundError'
error.__proto__ = NotFoundError.prototype
return error
}
NotFoundError.prototype.__proto__ = Error.prototype
// import Errors from object-persistor to pass instanceof checks
const OError = require('@overleaf/o-error')
const { Errors } = require('@overleaf/object-persistor')
module.exports = Errors = { NotFoundError }
class Md5MismatchError extends OError {}
module.exports = {
Md5MismatchError,
...Errors
}

View file

@ -14,6 +14,7 @@ let MongoManager
const { db, ObjectId } = require('./mongojs')
const logger = require('logger-sharelatex')
const metrics = require('metrics-sharelatex')
const { promisify } = require('util')
module.exports = MongoManager = {
findDoc(project_id, doc_id, filter, callback) {
@ -162,14 +163,11 @@ module.exports = MongoManager = {
)
}
}
;[
'findDoc',
'getProjectsDocs',
'getArchivedProjectDocs',
'upsertIntoDocCollection',
'markDocAsArchived',
'getDocVersion',
'setDocVersion'
].map((method) =>
const methods = Object.getOwnPropertyNames(MongoManager)
module.exports.promises = {}
for (const method of methods) {
metrics.timeAsyncMethod(MongoManager, method, 'mongo.MongoManager', logger)
)
module.exports.promises[method] = promisify(module.exports[method])
}

View file

@ -0,0 +1,9 @@
const settings = require('settings-sharelatex')
const persistorSettings = settings.docstore
persistorSettings.Metrics = require('metrics-sharelatex')
const ObjectPersistor = require('@overleaf/object-persistor')
const persistor = ObjectPersistor(persistorSettings)
module.exports = persistor

View file

@ -1,10 +1,8 @@
docstore
--acceptance-creds=None
--dependencies=mongo
--dependencies=mongo,gcs
--docker-repos=gcr.io/overleaf-ops
--env-add=
--env-pass-through=
--language=es
--node-version=10.21.0
--public-repo=True
--script-version=2.3.0
--script-version=3.2.0

View file

@ -17,8 +17,15 @@ const Settings = {
mongo: {},
docstore: {
backend: process.env.BACKEND || 's3',
healthCheck: {
project_id: process.env.HEALTH_CHECK_PROJECT_ID
},
bucket: process.env.BUCKET_NAME || process.env.AWS_BUCKET || 'bucket',
gcs: {
unlockBeforeDelete: process.env.GCS_UNLOCK_BEFORE_DELETE === 'true',
deletedBucketSuffix: process.env.GCS_DELETED_BUCKET_SUFFIX,
deleteConcurrency: parseInt(process.env.GCS_DELETE_CONCURRENCY) || 50
}
},
@ -41,7 +48,28 @@ if (
Settings.docstore.s3 = {
key: process.env.AWS_ACCESS_KEY_ID,
secret: process.env.AWS_SECRET_ACCESS_KEY,
bucket: process.env.AWS_BUCKET
bucket: process.env.AWS_BUCKET,
endpoint: process.env.AWS_S3_ENDPOINT,
pathStyle: process.env.AWS_S3_PATH_STYLE,
partSize: parseInt(process.env.AWS_S3_PARTSIZE) || 100 * 1024 * 1024
}
}
if (process.env.GCS_API_ENDPOINT) {
Settings.docstore.gcs.endpoint = {
apiEndpoint: process.env.GCS_API_ENDPOINT,
apiScheme: process.env.GCS_API_SCHEME,
projectId: process.env.GCS_PROJECT_ID
}
}
if (process.env.FALLBACK_BACKEND) {
Settings.docstore.fallback = {
backend: process.env.FALLBACK_BACKEND,
// mapping of bucket names on the fallback, to bucket names on the primary.
// e.g. { myS3UserFilesBucketName: 'myGoogleUserFilesBucketName' }
buckets: JSON.parse(process.env.FALLBACK_BUCKET_MAPPING || '{}'),
copyOnMiss: process.env.COPY_ON_MISS === 'true'
}
}

View file

@ -11,6 +11,7 @@ services:
command: npm run test:unit:_run
environment:
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
test_acceptance:
@ -24,11 +25,18 @@ services:
REDIS_HOST: redis
MONGO_HOST: mongo
POSTGRES_HOST: postgres
GCS_API_ENDPOINT: gcs:9090
GCS_API_SCHEME: http
GCS_PROJECT_ID: fake
STORAGE_EMULATOR_HOST: http://gcs:9090/storage/v1
MOCHA_GREP: ${MOCHA_GREP}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
depends_on:
mongo:
condition: service_healthy
gcs:
condition: service_healthy
user: node
command: npm run test:acceptance:_run
@ -42,3 +50,7 @@ services:
user: root
mongo:
image: mongo:3.6
gcs:
build:
context: test/acceptance/deps
dockerfile: Dockerfile.fake-gcs

View file

@ -13,7 +13,8 @@ services:
environment:
MOCHA_GREP: ${MOCHA_GREP}
NODE_ENV: test
command: npm run test:unit
NODE_OPTIONS: "--unhandled-rejections=strict"
command: npm run --silent test:unit
user: node
test_acceptance:
@ -29,15 +30,26 @@ services:
REDIS_HOST: redis
MONGO_HOST: mongo
POSTGRES_HOST: postgres
GCS_API_ENDPOINT: gcs:9090
GCS_API_SCHEME: http
GCS_PROJECT_ID: fake
STORAGE_EMULATOR_HOST: http://gcs:9090/storage/v1
MOCHA_GREP: ${MOCHA_GREP}
LOG_LEVEL: ERROR
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
user: node
depends_on:
mongo:
condition: service_healthy
command: npm run test:acceptance
gcs:
condition: service_healthy
command: npm run --silent test:acceptance
mongo:
image: mongo:3.6
gcs:
build:
context: test/acceptance/deps
dockerfile: Dockerfile.fake-gcs

View file

@ -8,7 +8,6 @@
"execMap": {
"js": "npm run start"
},
"watch": [
"app/js/",
"app.js",

File diff suppressed because it is too large Load diff

View file

@ -14,45 +14,51 @@
"test:unit:_run": "mocha --recursive --reporter spec $@ test/unit/js",
"test:unit": "npm run test:unit:_run -- --grep=$MOCHA_GREP",
"nodemon": "nodemon --config nodemon.json",
"lint": "node_modules/.bin/eslint .",
"lint": "node_modules/.bin/eslint --max-warnings 0 .",
"format": "node_modules/.bin/prettier-eslint $PWD'/**/*.js' --list-different",
"format:fix": "node_modules/.bin/prettier-eslint $PWD'/**/*.js' --write"
},
"dependencies": {
"@overleaf/object-persistor": "git+https://github.com/overleaf/object-persistor.git",
"async": "^2.6.3",
"body-parser": "^1.19.0",
"express": "^4.17.1",
"logger-sharelatex": "^2.1.0",
"metrics-sharelatex": "^2.6.2",
"logger-sharelatex": "^2.1.1",
"metrics-sharelatex": "^2.7.0",
"mongojs": "3.1.0",
"settings-sharelatex": "^1.1.0",
"underscore": "~1.9.2"
"streamifier": "^0.1.1",
"tiny-async-pool": "^1.1.0",
"underscore": "~1.10.2"
},
"devDependencies": {
"@google-cloud/storage": "^5.1.2",
"babel-eslint": "^10.1.0",
"bunyan": "~1.8.12",
"bunyan": "~1.8.14",
"chai": "~4.2.0",
"chai-as-promised": "^7.1.1",
"eslint": "^6.8.0",
"eslint-config-prettier": "^6.10.0",
"eslint-config-standard": "^14.1.0",
"eslint-config-prettier": "^6.11.0",
"eslint-config-standard": "^14.1.1",
"eslint-config-standard-jsx": "^8.1.0",
"eslint-config-standard-react": "^9.2.0",
"eslint-plugin-chai-expect": "^2.1.0",
"eslint-plugin-chai-friendly": "^0.5.0",
"eslint-plugin-import": "^2.20.1",
"eslint-plugin-jsx-a11y": "^6.2.3",
"eslint-plugin-chai-expect": "^2.2.0",
"eslint-plugin-chai-friendly": "^0.6.0",
"eslint-plugin-import": "^2.22.0",
"eslint-plugin-jsx-a11y": "^6.3.1",
"eslint-plugin-mocha": "^6.3.0",
"eslint-plugin-node": "^11.0.0",
"eslint-plugin-prettier": "^3.1.2",
"eslint-plugin-node": "^11.1.0",
"eslint-plugin-prettier": "^3.1.4",
"eslint-plugin-promise": "^4.2.1",
"eslint-plugin-react": "^7.19.0",
"eslint-plugin-react": "^7.20.3",
"eslint-plugin-standard": "^4.0.1",
"mocha": "^7.1.1",
"prettier": "^2.0.0",
"mocha": "^7.2.0",
"prettier": "^2.0.5",
"prettier-eslint-cli": "^5.0.0",
"request": "~2.88.2",
"sandboxed-module": "~2.0.3",
"sinon": "~9.0.1"
"sandboxed-module": "~2.0.4",
"sinon": "~9.0.2",
"sinon-chai": "^3.5.0"
},
"engines": {
"node": "~6.14.1"

View file

@ -0,0 +1,5 @@
FROM fsouza/fake-gcs-server:latest
RUN apk add --update --no-cache curl
COPY healthcheck.sh /healthcheck.sh
HEALTHCHECK --interval=1s --timeout=1s --retries=30 CMD /healthcheck.sh http://localhost:9090
CMD ["--port=9090", "--scheme=http"]

View file

@ -0,0 +1,9 @@
#!/bin/sh
# health check to allow 404 status code as valid
STATUSCODE=$(curl --silent --output /dev/null --write-out "%{http_code}" $1)
# will be 000 on non-http error (e.g. connection failure)
if test $STATUSCODE -ge 500 || test $STATUSCODE -lt 200; then
exit 1
fi
exit 0

View file

@ -12,22 +12,37 @@
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const sinon = require('sinon')
const chai = require('chai')
const should = chai.should()
const { db, ObjectId, ISODate } = require('../../../app/js/mongojs')
const async = require('async')
process.env.BACKEND = 'gcs'
const Settings = require('settings-sharelatex')
const DocArchiveManager = require('../../../app/js/DocArchiveManager.js')
const request = require('request')
const chai = require('chai')
const { expect } = chai
const should = chai.should()
const { db, ObjectId } = require('../../../app/js/mongojs')
const async = require('async')
const DocstoreApp = require('./helpers/DocstoreApp')
const DocstoreClient = require('./helpers/DocstoreClient')
const { Storage } = require('@google-cloud/storage')
const Persistor = require('../../../app/js/PersistorManager')
const Streamifier = require('streamifier')
function uploadContent(path, json, callback) {
const stream = Streamifier.createReadStream(JSON.stringify(json))
Persistor.sendStream(Settings.docstore.bucket, path, stream)
.then(() => callback())
.catch(callback)
}
describe('Archiving', function () {
before(function (done) {
return DocstoreApp.ensureRunning(done)
})
before(async function () {
const storage = new Storage(Settings.docstore.gcs.endpoint)
await storage.createBucket(Settings.docstore.bucket)
await storage.createBucket(`${Settings.docstore.bucket}-deleted`)
})
describe('multiple docs in a project', function () {
before(function (done) {
this.project_id = ObjectId()
@ -99,10 +114,10 @@ describe('Archiving', function () {
return DocstoreClient.getS3Doc(
this.project_id,
doc._id,
(error, res, s3_doc) => {
(error, s3_doc) => {
s3_doc.lines.should.deep.equal(doc.lines)
s3_doc.ranges.should.deep.equal(doc.ranges)
return callback()
callback()
}
)
}
@ -215,7 +230,7 @@ describe('Archiving', function () {
return DocstoreClient.getS3Doc(
this.project_id,
this.doc._id,
(error, res, s3_doc) => {
(error, s3_doc) => {
if (error != null) {
throw error
}
@ -316,7 +331,7 @@ describe('Archiving', function () {
return DocstoreClient.getS3Doc(
this.project_id,
this.doc._id,
(error, res, s3_doc) => {
(error, s3_doc) => {
if (error != null) {
throw error
}
@ -790,7 +805,7 @@ describe('Archiving', function () {
return DocstoreClient.getS3Doc(
this.project_id,
this.doc._id,
(error, res, s3_doc) => {
(error, s3_doc) => {
if (error != null) {
throw error
}
@ -909,7 +924,7 @@ describe('Archiving', function () {
return DocstoreClient.getS3Doc(
this.project_id,
this.doc._id,
(error, res, s3_doc) => {
(error, s3_doc) => {
if (error != null) {
throw error
}
@ -1006,7 +1021,7 @@ describe('Archiving', function () {
return DocstoreClient.getS3Doc(
this.project_id,
this.doc._id,
(error, res, s3_doc) => {
(error, s3_doc) => {
if (error != null) {
throw error
}
@ -1054,39 +1069,36 @@ describe('Archiving', function () {
ranges: {},
version: 2
}
const options = DocArchiveManager.buildS3Options(
`${this.project_id}/${this.doc._id}`
)
options.json = this.doc.lines
return request.put(options, (error, res, body) => {
if (error != null) {
throw error
}
res.statusCode.should.equal(200)
return db.docs.insert(
{
project_id: this.project_id,
_id: this.doc._id,
rev: this.doc.version,
inS3: true
},
(error) => {
if (error != null) {
throw error
}
return DocstoreClient.getAllDocs(
this.project_id,
(error, res, fetched_docs) => {
this.fetched_docs = fetched_docs
if (error != null) {
throw error
}
return done()
uploadContent(
`${this.project_id}/${this.doc._id}`,
this.doc.lines,
(error) => {
expect(error).not.to.exist
db.docs.insert(
{
project_id: this.project_id,
_id: this.doc._id,
rev: this.doc.version,
inS3: true
},
(error) => {
if (error != null) {
throw error
}
)
}
)
})
DocstoreClient.getAllDocs(
this.project_id,
(error, res, fetched_docs) => {
this.fetched_docs = fetched_docs
if (error != null) {
throw error
}
return done()
}
)
}
)
}
)
})
it('should restore the doc to mongo', function (done) {

View file

@ -11,12 +11,12 @@
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const sinon = require('sinon')
const chai = require('chai')
chai.should()
const { db, ObjectId } = require('../../../app/js/mongojs')
const { expect } = chai
const DocstoreApp = require('./helpers/DocstoreApp')
const Errors = require('../../../app/js/Errors')
const DocstoreClient = require('./helpers/DocstoreClient')
@ -143,17 +143,10 @@ describe("Destroying a project's documents", function () {
})
return it('should remove the doc contents from s3', function (done) {
return DocstoreClient.getS3Doc(
this.project_id,
this.doc_id,
(error, res, s3_doc) => {
if (error != null) {
throw error
}
expect(res.statusCode).to.equal(404)
return done()
}
)
return DocstoreClient.getS3Doc(this.project_id, this.doc_id, (error) => {
expect(error).to.be.instanceOf(Errors.NotFoundError)
done()
})
})
})
})

View file

@ -13,9 +13,23 @@
*/
let DocstoreClient
const request = require('request').defaults({ jar: false })
const { db, ObjectId } = require('../../../../app/js/mongojs')
const settings = require('settings-sharelatex')
const DocArchiveManager = require('../../../../app/js/DocArchiveManager.js')
const Persistor = require('../../../../app/js/PersistorManager')
async function streamToString(stream) {
const chunks = []
return new Promise((resolve, reject) => {
stream.on('data', (chunk) => chunks.push(chunk))
stream.on('error', reject)
stream.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')))
})
}
async function getStringFromPersistor(persistor, bucket, key) {
const stream = await persistor.getObjectStream(bucket, key, {})
stream.resume()
return streamToString(stream)
}
module.exports = DocstoreClient = {
createDoc(project_id, doc_id, lines, version, ranges, callback) {
@ -55,7 +69,9 @@ module.exports = DocstoreClient = {
url: `http://localhost:${settings.internal.docstore.port}/project/${project_id}/doc`,
json: true
},
callback
(req, res, body) => {
callback(req, res, body)
}
)
},
@ -126,11 +142,14 @@ module.exports = DocstoreClient = {
},
getS3Doc(project_id, doc_id, callback) {
if (callback == null) {
callback = function (error, res, body) {}
}
const options = DocArchiveManager.buildS3Options(project_id + '/' + doc_id)
options.json = true
return request.get(options, callback)
getStringFromPersistor(
Persistor,
settings.docstore.bucket,
`${project_id}/${doc_id}`
)
.then((data) => {
callback(null, JSON.parse(data))
})
.catch(callback)
}
}

View file

@ -1,47 +1,59 @@
/* eslint-disable
camelcase,
handle-callback-err,
no-return-assign,
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const { assert } = require('chai')
const sinon = require('sinon')
const chai = require('chai')
const should = chai.should()
const { expect } = chai
const modulePath = '../../../app/js/DocArchiveManager.js'
const SandboxedModule = require('sandboxed-module')
const { ObjectId } = require('mongojs')
const Errors = require('../../../app/js/Errors')
const crypto = require('crypto')
chai.use(require('chai-as-promised'))
chai.use(require('sinon-chai'))
describe('DocArchiveManager', function () {
let DocArchiveManager,
PersistorManager,
MongoManager,
RangeManager,
Settings,
Logger,
Crypto,
Streamifier,
HashDigest,
HashUpdate,
archivedDocs,
mongoDocs,
docJson,
md5Sum,
projectId,
readStream,
stream
beforeEach(function () {
this.settings = {
md5Sum = 'decafbad'
RangeManager = {
jsonRangesToMongo: sinon.stub().returns({ mongo: 'ranges' })
}
Settings = {
docstore: {
s3: {
secret: 'secret',
key: 'this_key',
bucket: 'doc-archive-unit-test'
}
bucket: 'wombat'
}
}
this.request = {
put: {},
get: {},
del: {}
Logger = {
log: sinon.stub(),
err: sinon.stub()
}
HashDigest = sinon.stub().returns(md5Sum)
HashUpdate = sinon.stub().returns({ digest: HashDigest })
Crypto = {
createHash: sinon.stub().returns({ update: HashUpdate })
}
Streamifier = {
createReadStream: sinon.stub().returns({ stream: 'readStream' })
}
this.archivedDocs = [
projectId = ObjectId()
archivedDocs = [
{
_id: ObjectId(),
inS3: true,
@ -58,8 +70,7 @@ describe('DocArchiveManager', function () {
rev: 6
}
]
this.mongoDocs = [
mongoDocs = [
{
_id: ObjectId(),
lines: ['one', 'two', 'three'],
@ -87,488 +98,427 @@ describe('DocArchiveManager', function () {
}
]
this.unarchivedDocs = [
{
_id: ObjectId(),
lines: ['wombat', 'potato', 'banana'],
rev: 2
},
{
_id: ObjectId(),
lines: ['llama', 'turnip', 'apple'],
rev: 4
},
{
_id: ObjectId(),
lines: ['elephant', 'swede', 'nectarine'],
rev: 6
}
]
docJson = JSON.stringify({
lines: mongoDocs[0].lines,
ranges: mongoDocs[0].ranges,
schema_v: 1
})
this.mixedDocs = this.archivedDocs.concat(this.unarchivedDocs)
stream = {
on: sinon.stub(),
resume: sinon.stub()
}
stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8'))
stream.on.withArgs('end').yields()
this.MongoManager = {
markDocAsArchived: sinon.stub().callsArgWith(2, null),
upsertIntoDocCollection: sinon.stub().callsArgWith(3, null),
getProjectsDocs: sinon.stub().callsArgWith(3, null, this.mongoDocs),
getArchivedProjectDocs: sinon.stub().callsArgWith(2, null, this.mongoDocs)
readStream = {
stream: 'readStream'
}
this.requires = {
'settings-sharelatex': this.settings,
'./MongoManager': this.MongoManager,
request: this.request,
'./RangeManager': (this.RangeManager = {}),
'logger-sharelatex': {
log() {},
err() {}
PersistorManager = {
getObjectStream: sinon.stub().resolves(stream),
sendStream: sinon.stub().resolves(),
getObjectMd5Hash: sinon.stub().resolves(md5Sum),
deleteObject: sinon.stub().resolves()
}
MongoManager = {
promises: {
markDocAsArchived: sinon.stub().resolves(),
upsertIntoDocCollection: sinon.stub().resolves(),
getProjectsDocs: sinon.stub().resolves(mongoDocs),
getArchivedProjectDocs: sinon.stub().resolves(archivedDocs),
findDoc: sinon.stub().resolves(),
destroyDoc: sinon.stub().resolves()
}
}
this.globals = { JSON }
for (const mongoDoc of mongoDocs) {
MongoManager.promises.findDoc
.withArgs(projectId, mongoDoc._id)
.resolves(mongoDoc)
}
this.error = 'my errror'
this.project_id = ObjectId().toString()
this.stubbedError = new Errors.NotFoundError('Error in S3 request')
return (this.DocArchiveManager = SandboxedModule.require(modulePath, {
requires: this.requires,
globals: this.globals
}))
DocArchiveManager = SandboxedModule.require(modulePath, {
requires: {
'settings-sharelatex': Settings,
'logger-sharelatex': Logger,
crypto: Crypto,
streamifier: Streamifier,
'./MongoManager': MongoManager,
'./RangeManager': RangeManager,
'./PersistorManager': PersistorManager,
'./Errors': Errors
},
globals: {
console,
JSON
}
})
})
describe('archiveDoc', function () {
it('should use correct options', function (done) {
this.request.put = sinon
.stub()
.callsArgWith(1, null, { statusCode: 200, headers: { etag: '' } })
return this.DocArchiveManager.archiveDoc(
this.project_id,
this.mongoDocs[0],
(err) => {
const opts = this.request.put.args[0][0]
assert.deepEqual(opts.aws, {
key: this.settings.docstore.s3.key,
secret: this.settings.docstore.s3.secret,
bucket: this.settings.docstore.s3.bucket
})
opts.body.should.equal(
JSON.stringify({
lines: this.mongoDocs[0].lines,
ranges: this.mongoDocs[0].ranges,
schema_v: 1
})
)
opts.timeout.should.equal(30 * 1000)
opts.uri.should.equal(
`https://${this.settings.docstore.s3.bucket}.s3.amazonaws.com/${this.project_id}/${this.mongoDocs[0]._id}`
)
return done()
}
it('should resolve when passed a valid document', async function () {
await expect(
DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0])
).to.eventually.be.fulfilled
})
it('should throw an error if the doc has no lines', async function () {
const doc = mongoDocs[0]
doc.lines = null
await expect(
DocArchiveManager.promises.archiveDoc(projectId, doc)
).to.eventually.be.rejectedWith('doc has no lines')
})
it('should add the schema version', async function () {
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[1])
expect(Streamifier.createReadStream).to.have.been.calledWith(
sinon.match(/"schema_v":1/)
)
})
it('should return no md5 error', function (done) {
const data = JSON.stringify({
lines: this.mongoDocs[0].lines,
ranges: this.mongoDocs[0].ranges,
it('should calculate the hex md5 sum of the content', async function () {
const json = JSON.stringify({
lines: mongoDocs[0].lines,
ranges: mongoDocs[0].ranges,
schema_v: 1
})
this.md5 = crypto.createHash('md5').update(data).digest('hex')
this.request.put = sinon
.stub()
.callsArgWith(1, null, { statusCode: 200, headers: { etag: this.md5 } })
return this.DocArchiveManager.archiveDoc(
this.project_id,
this.mongoDocs[0],
(err) => {
should.not.exist(err)
return done()
}
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0])
expect(Crypto.createHash).to.have.been.calledWith('md5')
expect(HashUpdate).to.have.been.calledWith(json)
expect(HashDigest).to.have.been.calledWith('hex')
})
it('should pass the md5 hash to the object persistor for verification', async function () {
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0])
expect(PersistorManager.sendStream).to.have.been.calledWith(
sinon.match.any,
sinon.match.any,
sinon.match.any,
{ sourceMd5: md5Sum }
)
})
return it('should return the error', function (done) {
this.request.put = sinon.stub().callsArgWith(1, this.stubbedError, {
statusCode: 400,
headers: { etag: '' }
})
return this.DocArchiveManager.archiveDoc(
this.project_id,
this.mongoDocs[0],
(err) => {
should.exist(err)
return done()
}
it('should pass the correct bucket and key to the persistor', async function () {
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0])
expect(PersistorManager.sendStream).to.have.been.calledWith(
Settings.docstore.bucket,
`${projectId}/${mongoDocs[0]._id}`
)
})
it('should create a stream from the encoded json and send it', async function () {
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0])
expect(Streamifier.createReadStream).to.have.been.calledWith(docJson)
expect(PersistorManager.sendStream).to.have.been.calledWith(
sinon.match.any,
sinon.match.any,
readStream
)
})
it('should mark the doc as archived', async function () {
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0])
expect(MongoManager.promises.markDocAsArchived).to.have.been.calledWith(
mongoDocs[0]._id,
mongoDocs[0].rev
)
})
describe('with null bytes in the result', function () {
const _stringify = JSON.stringify
beforeEach(function () {
JSON.stringify = sinon.stub().returns('{"bad": "\u0000"}')
})
afterEach(function () {
JSON.stringify = _stringify
})
it('should return an error', async function () {
await expect(
DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0])
).to.eventually.be.rejectedWith('null bytes detected')
})
})
})
describe('unarchiveDoc', function () {
it('should use correct options', function (done) {
this.request.get = sinon
.stub()
.callsArgWith(1, null, { statusCode: 200 }, this.mongoDocs[0].lines)
this.request.del = sinon
.stub()
.callsArgWith(1, null, { statusCode: 204 }, {})
return this.DocArchiveManager.unarchiveDoc(
this.project_id,
this.mongoDocs[0]._id,
(err) => {
const opts = this.request.get.args[0][0]
assert.deepEqual(opts.aws, {
key: this.settings.docstore.s3.key,
secret: this.settings.docstore.s3.secret,
bucket: this.settings.docstore.s3.bucket
})
opts.json.should.equal(true)
opts.timeout.should.equal(30 * 1000)
opts.uri.should.equal(
`https://${this.settings.docstore.s3.bucket}.s3.amazonaws.com/${this.project_id}/${this.mongoDocs[0]._id}`
let docId
beforeEach(function () {
docId = mongoDocs[0]._id
})
it('should resolve when passed a valid document', async function () {
await expect(DocArchiveManager.promises.unarchiveDoc(projectId, docId)).to
.eventually.be.fulfilled
})
it('should throw an error if the md5 does not match', async function () {
PersistorManager.getObjectMd5Hash.resolves('badf00d')
await expect(
DocArchiveManager.promises.unarchiveDoc(projectId, docId)
).to.eventually.be.rejected.and.be.instanceof(Errors.Md5MismatchError)
})
it('should update the doc lines in mongo', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(
MongoManager.promises.upsertIntoDocCollection
).to.have.been.calledWith(projectId, docId, { lines: mongoDocs[0].lines })
})
it('should delete the doc in s3', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(PersistorManager.deleteObject).to.have.been.calledWith(
Settings.docstore.bucket,
`${projectId}/${docId}`
)
})
describe('doc contents', function () {
let mongoDoc, s3Doc
describe('when the doc has the old schema', function () {
beforeEach(function () {
mongoDoc = {
lines: ['doc', 'lines']
}
s3Doc = ['doc', 'lines']
docJson = JSON.stringify(s3Doc)
stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8'))
})
it('should return the docs lines', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(
MongoManager.promises.upsertIntoDocCollection
).to.have.been.calledWith(projectId, docId, mongoDoc)
})
})
describe('with the new schema and ranges', function () {
beforeEach(function () {
s3Doc = {
lines: ['doc', 'lines'],
ranges: { json: 'ranges' },
schema_v: 1
}
mongoDoc = {
lines: ['doc', 'lines'],
ranges: { mongo: 'ranges' }
}
docJson = JSON.stringify(s3Doc)
stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8'))
})
it('should return the doc lines and ranges', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(
MongoManager.promises.upsertIntoDocCollection
).to.have.been.calledWith(projectId, docId, mongoDoc)
})
})
describe('with the new schema and no ranges', function () {
beforeEach(function () {
s3Doc = {
lines: ['doc', 'lines'],
schema_v: 1
}
mongoDoc = {
lines: ['doc', 'lines']
}
docJson = JSON.stringify(s3Doc)
stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8'))
})
it('should return only the doc lines', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(
MongoManager.promises.upsertIntoDocCollection
).to.have.been.calledWith(projectId, docId, mongoDoc)
})
})
describe('with an unrecognised schema', function () {
beforeEach(function () {
s3Doc = {
lines: ['doc', 'lines'],
schema_v: 2
}
docJson = JSON.stringify(s3Doc)
stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8'))
})
it('should throw an error', async function () {
await expect(
DocArchiveManager.promises.unarchiveDoc(projectId, docId)
).to.eventually.be.rejectedWith(
"I don't understand the doc format in s3"
)
return done()
}
)
})
})
})
})
describe('destroyDoc', function () {
let docId
beforeEach(function () {
docId = mongoDocs[0]._id
})
it('should return the error', function (done) {
this.request.get = sinon.stub().callsArgWith(1, this.stubbedError, {}, {})
return this.DocArchiveManager.unarchiveDoc(
this.project_id,
this.mongoDocs[0],
(err) => {
should.exist(err)
return done()
}
)
it('should resolve when passed a valid document', async function () {
await expect(DocArchiveManager.promises.destroyDoc(projectId, docId)).to
.eventually.be.fulfilled
})
return it('should error if the doc lines are a string not an array', function (done) {
this.request.get = sinon
.stub()
.callsArgWith(1, null, { statusCode: 200 }, 'this is a string')
this.request.del = sinon.stub()
return this.DocArchiveManager.unarchiveDoc(
this.project_id,
this.mongoDocs[0],
(err) => {
should.exist(err)
this.request.del.called.should.equal(false)
return done()
}
)
it('should throw a not found error when there is no document', async function () {
await expect(
DocArchiveManager.promises.destroyDoc(projectId, 'wombat')
).to.eventually.be.rejected.and.be.instanceof(Errors.NotFoundError)
})
describe('when the doc is in s3', function () {
beforeEach(function () {
mongoDocs[0].inS3 = true
})
it('should delete the document from s3, if it is in s3', async function () {
await DocArchiveManager.promises.destroyDoc(projectId, docId)
expect(PersistorManager.deleteObject).to.have.been.calledWith(
Settings.docstore.bucket,
`${projectId}/${docId}`
)
})
it('should delete the doc in mongo', async function () {
await DocArchiveManager.promises.destroyDoc(projectId, docId)
})
})
describe('when the doc is not in s3', function () {
beforeEach(function () {
mongoDocs[0].inS3 = false
})
it('should not delete the document from s3, if it is not in s3', async function () {
await DocArchiveManager.promises.destroyDoc(projectId, docId)
expect(PersistorManager.deleteObject).not.to.have.been.called
})
it('should delete the doc in mongo', async function () {
await DocArchiveManager.promises.destroyDoc(projectId, docId)
})
})
})
describe('archiveAllDocs', function () {
it('should archive all project docs which are not in s3', function (done) {
this.MongoManager.getProjectsDocs = sinon
.stub()
.callsArgWith(3, null, this.mongoDocs)
this.DocArchiveManager.archiveDoc = sinon.stub().callsArgWith(2, null)
return this.DocArchiveManager.archiveAllDocs(this.project_id, (err) => {
this.DocArchiveManager.archiveDoc
.calledWith(this.project_id, this.mongoDocs[0])
.should.equal(true)
this.DocArchiveManager.archiveDoc
.calledWith(this.project_id, this.mongoDocs[1])
.should.equal(true)
this.DocArchiveManager.archiveDoc
.calledWith(this.project_id, this.mongoDocs[4])
.should.equal(true)
this.DocArchiveManager.archiveDoc
.calledWith(this.project_id, this.mongoDocs[2])
.should.equal(false)
this.DocArchiveManager.archiveDoc
.calledWith(this.project_id, this.mongoDocs[3])
.should.equal(false)
should.not.exist(err)
return done()
})
it('should resolve with valid arguments', async function () {
await expect(DocArchiveManager.promises.archiveAllDocs(projectId)).to
.eventually.be.fulfilled
})
it('should return error if have no docs', function (done) {
this.MongoManager.getProjectsDocs = sinon
.stub()
.callsArgWith(3, null, null)
it('should archive all project docs which are not in s3', async function () {
await DocArchiveManager.promises.archiveAllDocs(projectId)
// not inS3
expect(MongoManager.promises.markDocAsArchived).to.have.been.calledWith(
mongoDocs[0]._id
)
expect(MongoManager.promises.markDocAsArchived).to.have.been.calledWith(
mongoDocs[1]._id
)
expect(MongoManager.promises.markDocAsArchived).to.have.been.calledWith(
mongoDocs[4]._id
)
return this.DocArchiveManager.archiveAllDocs(this.project_id, (err) => {
should.exist(err)
return done()
})
// inS3
expect(
MongoManager.promises.markDocAsArchived
).not.to.have.been.calledWith(mongoDocs[2]._id)
expect(
MongoManager.promises.markDocAsArchived
).not.to.have.been.calledWith(mongoDocs[3]._id)
})
it('should return the error', function (done) {
this.MongoManager.getProjectsDocs = sinon
.stub()
.callsArgWith(3, this.error, null)
it('should return error if the project has no docs', async function () {
MongoManager.promises.getProjectsDocs.resolves(null)
return this.DocArchiveManager.archiveAllDocs(this.project_id, (err) => {
err.should.equal(this.error)
return done()
})
})
return describe('when most have been already put in s3', function () {
beforeEach(function () {
let numberOfDocs = 10 * 1000
this.mongoDocs = []
while (--numberOfDocs !== 0) {
this.mongoDocs.push({ inS3: true, _id: ObjectId() })
}
this.MongoManager.getProjectsDocs = sinon
.stub()
.callsArgWith(3, null, this.mongoDocs)
return (this.DocArchiveManager.archiveDoc = sinon
.stub()
.callsArgWith(2, null))
})
return it('should not throw and error', function (done) {
return this.DocArchiveManager.archiveAllDocs(this.project_id, (err) => {
should.not.exist(err)
return done()
})
})
await expect(
DocArchiveManager.promises.archiveAllDocs(projectId)
).to.eventually.be.rejected.and.be.instanceof(Errors.NotFoundError)
})
})
describe('unArchiveAllDocs', function () {
it('should unarchive all inS3 docs', function (done) {
this.MongoManager.getArchivedProjectDocs = sinon
.stub()
.callsArgWith(1, null, this.archivedDocs)
this.DocArchiveManager.unarchiveDoc = sinon.stub().callsArgWith(2, null)
return this.DocArchiveManager.unArchiveAllDocs(this.project_id, (err) => {
for (const doc of Array.from(this.archivedDocs)) {
this.DocArchiveManager.unarchiveDoc
.calledWith(this.project_id, doc._id)
.should.equal(true)
}
should.not.exist(err)
return done()
})
it('should resolve with valid arguments', async function () {
await expect(DocArchiveManager.promises.unArchiveAllDocs(projectId)).to
.eventually.be.fulfilled
})
it('should return error if have no docs', function (done) {
this.MongoManager.getArchivedProjectDocs = sinon
.stub()
.callsArgWith(1, null, null)
return this.DocArchiveManager.unArchiveAllDocs(this.project_id, (err) => {
should.exist(err)
return done()
})
it('should unarchive all inS3 docs', async function () {
await DocArchiveManager.promises.unArchiveAllDocs(projectId)
for (const doc of archivedDocs) {
expect(PersistorManager.getObjectStream).to.have.been.calledWith(
Settings.docstore.bucket,
`${projectId}/${doc._id}`
)
}
})
return it('should return the error', function (done) {
this.MongoManager.getArchivedProjectDocs = sinon
.stub()
.callsArgWith(1, this.error, null)
return this.DocArchiveManager.unArchiveAllDocs(this.project_id, (err) => {
err.should.equal(this.error)
return done()
})
it('should return error if the project has no docs', async function () {
MongoManager.promises.getArchivedProjectDocs.resolves(null)
await expect(
DocArchiveManager.promises.unArchiveAllDocs(projectId)
).to.eventually.be.rejected.and.be.instanceof(Errors.NotFoundError)
})
})
describe('destroyAllDocs', function () {
beforeEach(function () {
this.request.del = sinon
.stub()
.callsArgWith(1, null, { statusCode: 204 }, {})
this.MongoManager.getProjectsDocs = sinon
.stub()
.callsArgWith(3, null, this.mixedDocs)
this.MongoManager.findDoc = sinon.stub().callsArgWith(3, null, null)
this.MongoManager.destroyDoc = sinon.stub().yields()
return Array.from(this.mixedDocs).map((doc) =>
this.MongoManager.findDoc
.withArgs(this.project_id, doc._id)
.callsArgWith(3, null, doc)
)
it('should resolve with valid arguments', async function () {
await expect(DocArchiveManager.promises.destroyAllDocs(projectId)).to
.eventually.be.fulfilled
})
it('should destroy all the docs', function (done) {
this.DocArchiveManager.destroyDoc = sinon.stub().callsArgWith(2, null)
return this.DocArchiveManager.destroyAllDocs(this.project_id, (err) => {
for (const doc of Array.from(this.mixedDocs)) {
this.DocArchiveManager.destroyDoc
.calledWith(this.project_id, doc._id)
.should.equal(true)
}
should.not.exist(err)
return done()
})
})
it('should delete all docs that are in s3 from s3', async function () {
await DocArchiveManager.promises.destroyAllDocs(projectId)
it('should only the s3 docs from s3', function (done) {
const docOpts = (doc) => {
return JSON.parse(
JSON.stringify({
aws: {
key: this.settings.docstore.s3.key,
secret: this.settings.docstore.s3.secret,
bucket: this.settings.docstore.s3.bucket
},
json: true,
timeout: 30 * 1000,
uri: `https://${this.settings.docstore.s3.bucket}.s3.amazonaws.com/${this.project_id}/${doc._id}`
})
// not inS3
for (const index of [0, 1, 4]) {
expect(PersistorManager.deleteObject).not.to.have.been.calledWith(
Settings.docstore.bucket,
`${projectId}/${mongoDocs[index]._id}`
)
}
return this.DocArchiveManager.destroyAllDocs(this.project_id, (err) => {
let doc
expect(err).not.to.exist
for (doc of Array.from(this.archivedDocs)) {
sinon.assert.calledWith(this.request.del, docOpts(doc))
}
for (doc of Array.from(this.unarchivedDocs)) {
expect(this.request.del.calledWith(docOpts(doc))).to.equal(false)
} // no notCalledWith
return done()
})
})
return it('should remove the docs from mongo', function (done) {
this.DocArchiveManager.destroyAllDocs(this.project_id, (err) => {
return expect(err).not.to.exist
})
for (const doc of Array.from(this.mixedDocs)) {
sinon.assert.calledWith(this.MongoManager.destroyDoc, doc._id)
// inS3
for (const index of [2, 3]) {
expect(PersistorManager.deleteObject).to.have.been.calledWith(
Settings.docstore.bucket,
`${projectId}/${mongoDocs[index]._id}`
)
}
return done()
})
})
describe('_s3DocToMongoDoc', function () {
describe('with the old schema', function () {
return it('should return the docs lines', function (done) {
return this.DocArchiveManager._s3DocToMongoDoc(
['doc', 'lines'],
(error, doc) => {
expect(doc).to.deep.equal({
lines: ['doc', 'lines']
})
return done()
}
)
})
})
describe('with the new schema', function () {
it('should return the doc lines and ranges', function (done) {
this.RangeManager.jsonRangesToMongo = sinon
.stub()
.returns({ mongo: 'ranges' })
return this.DocArchiveManager._s3DocToMongoDoc(
{
lines: ['doc', 'lines'],
ranges: { json: 'ranges' },
schema_v: 1
},
(error, doc) => {
expect(doc).to.deep.equal({
lines: ['doc', 'lines'],
ranges: { mongo: 'ranges' }
})
return done()
}
it('should destroy all docs in mongo', async function () {
await DocArchiveManager.promises.destroyAllDocs(projectId)
for (const mongoDoc of mongoDocs) {
expect(MongoManager.promises.destroyDoc).to.have.been.calledWith(
mongoDoc._id
)
})
return it('should return just the doc lines when there are no ranges', function (done) {
return this.DocArchiveManager._s3DocToMongoDoc(
{
lines: ['doc', 'lines'],
schema_v: 1
},
(error, doc) => {
expect(doc).to.deep.equal({
lines: ['doc', 'lines']
})
return done()
}
)
})
})
return describe('with an unrecognised schema', function () {
return it('should return an error', function (done) {
return this.DocArchiveManager._s3DocToMongoDoc(
{
schema_v: 2
},
(error, doc) => {
expect(error).to.exist
return done()
}
)
})
})
})
return describe('_mongoDocToS3Doc', function () {
describe('with a valid doc', function () {
return it('should return the json version', function (done) {
let doc
return this.DocArchiveManager._mongoDocToS3Doc(
(doc = {
lines: ['doc', 'lines'],
ranges: { mock: 'ranges' }
}),
(err, s3_doc) => {
expect(s3_doc).to.equal(
JSON.stringify({
lines: ['doc', 'lines'],
ranges: { mock: 'ranges' },
schema_v: 1
})
)
return done()
}
)
})
})
describe('with null bytes in the result', function () {
beforeEach(function () {
this._stringify = JSON.stringify
return (JSON.stringify = sinon.stub().returns('{"bad": "\u0000"}'))
})
afterEach(function () {
return (JSON.stringify = this._stringify)
})
return it('should return an error', function (done) {
return this.DocArchiveManager._mongoDocToS3Doc(
{
lines: ['doc', 'lines'],
ranges: { mock: 'ranges' }
},
(err, s3_doc) => {
expect(err).to.exist
return done()
}
)
})
})
return describe('without doc lines', function () {
return it('should return an error', function (done) {
return this.DocArchiveManager._mongoDocToS3Doc({}, (err, s3_doc) => {
expect(err).to.exist
return done()
})
})
}
})
})
})

View file

@ -38,7 +38,8 @@ describe('DocManager', function () {
log: sinon.stub(),
warn() {},
err() {}
})
}),
'./Errors': Errors
}
})
this.doc_id = ObjectId().toString()

View file

@ -29,6 +29,9 @@ describe('MongoManager', function () {
},
'metrics-sharelatex': { timeAsyncMethod: sinon.stub() },
'logger-sharelatex': { log() {} }
},
globals: {
console
}
})
this.project_id = ObjectId().toString()