From 7292d93dab43d1793d1dbb18bbb3aa10b78bc62e Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Wed, 12 Feb 2020 10:38:32 +0000 Subject: [PATCH 01/31] Add fake GCS server, and fix health checks --- services/filestore/docker-compose.ci.yml | 29 ++++++++++++++----- services/filestore/docker-compose.yml | 29 ++++++++++++++----- .../test/acceptance/deps/Dockerfile.fake-gcs | 5 ++++ .../test/acceptance/deps/Dockerfile.s3mock | 4 +++ .../test/acceptance/deps/healthcheck.sh | 9 ++++++ 5 files changed, 60 insertions(+), 16 deletions(-) create mode 100644 services/filestore/test/acceptance/deps/Dockerfile.fake-gcs create mode 100644 services/filestore/test/acceptance/deps/Dockerfile.s3mock create mode 100755 services/filestore/test/acceptance/deps/healthcheck.sh diff --git a/services/filestore/docker-compose.ci.yml b/services/filestore/docker-compose.ci.yml index 824ba815c0..fe4eaa35fd 100644 --- a/services/filestore/docker-compose.ci.yml +++ b/services/filestore/docker-compose.ci.yml @@ -22,10 +22,6 @@ services: REDIS_HOST: redis MONGO_HOST: mongo POSTGRES_HOST: postgres - AWS_S3_ENDPOINT: http://s3:9090 - AWS_S3_PATH_STYLE: 'true' - AWS_ACCESS_KEY_ID: fake - AWS_SECRET_ACCESS_KEY: fake MOCHA_GREP: ${MOCHA_GREP} NODE_ENV: test ENABLE_CONVERSIONS: "true" @@ -33,9 +29,21 @@ services: AWS_S3_USER_FILES_BUCKET_NAME: fake_user_files AWS_S3_TEMPLATE_FILES_BUCKET_NAME: fake_template_files AWS_S3_PUBLIC_FILES_BUCKET_NAME: fake_public_files + AWS_S3_ENDPOINT: http://s3:9090 + AWS_ACCESS_KEY_ID: fake + AWS_SECRET_ACCESS_KEY: fake + AWS_S3_PATH_STYLE: 'true' + GCS_API_ENDPOINT: gcs:9090 + GCS_USER_FILES_BUCKET_NAME: fake_userfiles + GCS_TEMPLATE_FILES_BUCKET_NAME: fake_templatefiles + GCS_PUBLIC_FILES_BUCKET_NAME: fake_publicfiles + NODE_TLS_REJECT_UNAUTHORIZED: 0 + STORAGE_EMULATOR_HOST: https://gcs:9090/storage/v1 depends_on: s3: condition: service_healthy + gcs: + condition: service_healthy user: node command: npm run test:acceptance:_run @@ -48,8 +56,13 @@ services: command: tar -czf /tmp/build/build.tar.gz --exclude=build.tar.gz --exclude-vcs . user: root s3: - image: adobe/s3mock + build: + context: test/acceptance/deps + dockerfile: Dockerfile.s3mock environment: - - initialBuckets=fake_user_files,fake_template_files,fake_public_files,bucket - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9090"] + - initialBuckets=fake_user_files,fake_template_files,fake_public_files + + gcs: + build: + context: test/acceptance/deps + dockerfile: Dockerfile.fake-gcs diff --git a/services/filestore/docker-compose.yml b/services/filestore/docker-compose.yml index c2634432ba..d904574f84 100644 --- a/services/filestore/docker-compose.yml +++ b/services/filestore/docker-compose.yml @@ -31,10 +31,6 @@ services: REDIS_HOST: redis MONGO_HOST: mongo POSTGRES_HOST: postgres - AWS_S3_ENDPOINT: http://s3:9090 - AWS_S3_PATH_STYLE: 'true' - AWS_ACCESS_KEY_ID: fake - AWS_SECRET_ACCESS_KEY: fake MOCHA_GREP: ${MOCHA_GREP} LOG_LEVEL: ERROR NODE_ENV: test @@ -43,15 +39,32 @@ services: AWS_S3_USER_FILES_BUCKET_NAME: fake_user_files AWS_S3_TEMPLATE_FILES_BUCKET_NAME: fake_template_files AWS_S3_PUBLIC_FILES_BUCKET_NAME: fake_public_files + AWS_S3_ENDPOINT: http://s3:9090 + AWS_S3_PATH_STYLE: 'true' + AWS_ACCESS_KEY_ID: fake + AWS_SECRET_ACCESS_KEY: fake + GCS_API_ENDPOINT: gcs:9090 + GCS_USER_FILES_BUCKET_NAME: fake_userfiles + GCS_TEMPLATE_FILES_BUCKET_NAME: fake_templatefiles + GCS_PUBLIC_FILES_BUCKET_NAME: fake_publicfiles + NODE_TLS_REJECT_UNAUTHORIZED: 0 + STORAGE_EMULATOR_HOST: https://gcs:9090/storage/v1 user: node depends_on: s3: condition: service_healthy + gcs: + condition: service_healthy command: npm run test:acceptance s3: - image: adobe/s3mock + build: + context: test/acceptance/deps + dockerfile: Dockerfile.s3mock environment: - - initialBuckets=fake_user_files,fake_template_files,fake_public_files,bucket - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9090"] + - initialBuckets=fake_user_files,fake_template_files,fake_public_files + + gcs: + build: + context: test/acceptance/deps + dockerfile: Dockerfile.fake-gcs diff --git a/services/filestore/test/acceptance/deps/Dockerfile.fake-gcs b/services/filestore/test/acceptance/deps/Dockerfile.fake-gcs new file mode 100644 index 0000000000..694bcdac9e --- /dev/null +++ b/services/filestore/test/acceptance/deps/Dockerfile.fake-gcs @@ -0,0 +1,5 @@ +FROM gh2k/fake-gcs-server +RUN apk add --update --no-cache curl +COPY healthcheck.sh /healthcheck.sh +HEALTHCHECK --interval=1s --timeout=1s --retries=30 CMD /healthcheck.sh http://localhost:9090 +CMD ["--port=9090"] diff --git a/services/filestore/test/acceptance/deps/Dockerfile.s3mock b/services/filestore/test/acceptance/deps/Dockerfile.s3mock new file mode 100644 index 0000000000..15eda4dd4b --- /dev/null +++ b/services/filestore/test/acceptance/deps/Dockerfile.s3mock @@ -0,0 +1,4 @@ +FROM adobe/s3mock +RUN apk add --update --no-cache curl +COPY healthcheck.sh /healthcheck.sh +HEALTHCHECK --interval=1s --timeout=1s --retries=30 CMD /healthcheck.sh http://localhost:9090 diff --git a/services/filestore/test/acceptance/deps/healthcheck.sh b/services/filestore/test/acceptance/deps/healthcheck.sh new file mode 100755 index 0000000000..cd19cea637 --- /dev/null +++ b/services/filestore/test/acceptance/deps/healthcheck.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +# health check to allow 404 status code as valid +STATUSCODE=$(curl --silent --output /dev/null --write-out "%{http_code}" $1) +# will be 000 on non-http error (e.g. connection failure) +if test $STATUSCODE -ge 500 || test $STATUSCODE -lt 200; then + exit 1 +fi +exit 0 From e6cf0687a9c042e399ebc981458e36d98a33bfeb Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Wed, 12 Feb 2020 10:39:00 +0000 Subject: [PATCH 02/31] Add gcs client library --- services/filestore/package-lock.json | 205 +++++++++++++++++++++++++-- services/filestore/package.json | 1 + 2 files changed, 197 insertions(+), 9 deletions(-) diff --git a/services/filestore/package-lock.json b/services/filestore/package-lock.json index de8452d061..1d74c5d172 100644 --- a/services/filestore/package-lock.json +++ b/services/filestore/package-lock.json @@ -603,6 +603,52 @@ "resolved": "https://registry.npmjs.org/@google-cloud/promisify/-/promisify-1.0.4.tgz", "integrity": "sha512-VccZDcOql77obTnFh0TbNED/6ZbbmHDf8UMNnzO1d5g9V0Htfm4k5cllY8P1tJsRKC3zWYGRLaViiupcgVjBoQ==" }, + "@google-cloud/storage": { + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/@google-cloud/storage/-/storage-4.4.0.tgz", + "integrity": "sha512-R64ey4dLIG3IgiKw0CL5MdZ4ZtZdGhN75171vjiL+ioZG+hlLFkjsrCTRuIdE35v42nNe5nXmVhBHQQTuPozHA==", + "requires": { + "@google-cloud/common": "^2.1.1", + "@google-cloud/paginator": "^2.0.0", + "@google-cloud/promisify": "^1.0.0", + "arrify": "^2.0.0", + "compressible": "^2.0.12", + "concat-stream": "^2.0.0", + "date-and-time": "^0.12.0", + "duplexify": "^3.5.0", + "extend": "^3.0.2", + "gaxios": "^2.0.1", + "gcs-resumable-upload": "^2.2.4", + "hash-stream-validation": "^0.2.2", + "mime": "^2.2.0", + "mime-types": "^2.0.8", + "onetime": "^5.1.0", + "p-limit": "^2.2.0", + "pumpify": "^2.0.0", + "readable-stream": "^3.4.0", + "snakeize": "^0.1.0", + "stream-events": "^1.0.1", + "through2": "^3.0.0", + "xdg-basedir": "^4.0.0" + }, + "dependencies": { + "mime": { + "version": "2.4.4", + "resolved": "https://registry.npmjs.org/mime/-/mime-2.4.4.tgz", + "integrity": "sha512-LRxmNwziLPT828z+4YkNzloCFC2YM4wrB99k+AV5ZbEyfGNWfG8SO1FUXLmLDBSo89NrJZ4DIWeLjy1CHGhMGA==" + }, + "readable-stream": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz", + "integrity": "sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==", + "requires": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + } + } + } + }, "@google-cloud/trace-agent": { "version": "3.6.1", "resolved": "https://registry.npmjs.org/@google-cloud/trace-agent/-/trace-agent-3.6.1.tgz", @@ -1422,6 +1468,11 @@ "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==" }, + "buffer-from": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz", + "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==" + }, "builtin-modules": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/builtin-modules/-/builtin-modules-3.1.0.tgz", @@ -1608,11 +1659,55 @@ "integrity": "sha512-6P6g0uetGpW/sdyUy/iQQCbFF0kWVMSIVSyYz7Zgjcgh8mgw8PQzDNZeyZ5DQ2gM7LBoZPHmnjz8rUthkBG5tw==", "dev": true }, + "compressible": { + "version": "2.0.18", + "resolved": "https://registry.npmjs.org/compressible/-/compressible-2.0.18.tgz", + "integrity": "sha512-AF3r7P5dWxL8MxyITRMlORQNaOA2IkAFaTr4k7BUumjPtRpGDTZpl0Pb1XCO6JeDCBdp126Cgs9sMxqSjgYyRg==", + "requires": { + "mime-db": ">= 1.43.0 < 2" + } + }, "concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==" }, + "concat-stream": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-2.0.0.tgz", + "integrity": "sha512-MWufYdFw53ccGjCA+Ol7XJYpAlW6/prSMzuPOTRnJGcGzuhLn4Scrz7qf6o8bROZ514ltazcIFJZevcfbo0x7A==", + "requires": { + "buffer-from": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^3.0.2", + "typedarray": "^0.0.6" + }, + "dependencies": { + "readable-stream": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz", + "integrity": "sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==", + "requires": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + } + } + } + }, + "configstore": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/configstore/-/configstore-5.0.1.tgz", + "integrity": "sha512-aMKprgk5YhBNyH25hj8wGt2+D52Sw1DRRIzqBwLp2Ya9mFmY8KPvvtvmna8SxVR9JMZ4kzMD68N22vlaRpkeFA==", + "requires": { + "dot-prop": "^5.2.0", + "graceful-fs": "^4.1.2", + "make-dir": "^3.0.0", + "unique-string": "^2.0.0", + "write-file-atomic": "^3.0.0", + "xdg-basedir": "^4.0.0" + } + }, "console-log-level": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/console-log-level/-/console-log-level-1.4.1.tgz", @@ -1688,6 +1783,11 @@ } } }, + "crypto-random-string": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/crypto-random-string/-/crypto-random-string-2.0.0.tgz", + "integrity": "sha512-v1plID3y9r/lPhviJ1wrXpLeyUIGAZ2SHNYTEapm7/8A9nLPoyvVp3RK/EPFqn5kEznyWgYZNsRtYYIWbuG8KA==" + }, "d64": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/d64/-/d64-1.0.0.tgz", @@ -1701,6 +1801,11 @@ "assert-plus": "^1.0.0" } }, + "date-and-time": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/date-and-time/-/date-and-time-0.12.0.tgz", + "integrity": "sha512-n2RJIAp93AucgF/U/Rz5WRS2Hjg5Z+QxscaaMCi6pVZT1JpJKRH+C08vyH/lRR1kxNXnPxgo3lWfd+jCb/UcuQ==" + }, "debug": { "version": "2.6.9", "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", @@ -2525,6 +2630,19 @@ "json-bigint": "^0.3.0" } }, + "gcs-resumable-upload": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/gcs-resumable-upload/-/gcs-resumable-upload-2.3.2.tgz", + "integrity": "sha512-OPS0iAmPCV+r7PziOIhyxmQOzsazFCy76yYDOS/Z80O/7cuny1KMfqDQa2T0jLaL8EreTU7EMZG5pUuqBKgzHA==", + "requires": { + "abort-controller": "^3.0.0", + "configstore": "^5.0.0", + "gaxios": "^2.0.0", + "google-auth-library": "^5.0.0", + "pumpify": "^2.0.0", + "stream-events": "^1.0.4" + } + }, "get-caller-file": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", @@ -2628,8 +2746,7 @@ "graceful-fs": { "version": "4.2.3", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.3.tgz", - "integrity": "sha512-a30VEBm4PEdx1dRB7MFK7BejejvCvBronbLjht+sHuGYj8PHs7M/5Z+rt5lw551vZ7yfTCj4Vuyy3mSJytDWRQ==", - "dev": true + "integrity": "sha512-a30VEBm4PEdx1dRB7MFK7BejejvCvBronbLjht+sHuGYj8PHs7M/5Z+rt5lw551vZ7yfTCj4Vuyy3mSJytDWRQ==" }, "growl": { "version": "1.10.5", @@ -2707,6 +2824,25 @@ "integrity": "sha512-PLcsoqu++dmEIZB+6totNFKq/7Do+Z0u4oT0zKOJNl3lYK6vGwwu2hjHs+68OEZbTjiUE9bgOABXbP/GvrS0Kg==", "dev": true }, + "hash-stream-validation": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/hash-stream-validation/-/hash-stream-validation-0.2.2.tgz", + "integrity": "sha512-cMlva5CxWZOrlS/cY0C+9qAzesn5srhFA8IT1VPiHc9bWWBLkJfEUIZr7MWoi89oOOGmpg8ymchaOjiArsGu5A==", + "requires": { + "through2": "^2.0.0" + }, + "dependencies": { + "through2": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/through2/-/through2-2.0.5.tgz", + "integrity": "sha512-/mrRod8xqpA+IHSLyGCQ2s8SPHiCDEeQJSep1jqLYeEUClOFG2Qsh+4FU6G9VeqpZnGW/Su8LQGc4YKni5rYSQ==", + "requires": { + "readable-stream": "~2.3.6", + "xtend": "~4.0.1" + } + } + } + }, "he": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/he/-/he-1.1.1.tgz", @@ -2827,8 +2963,7 @@ "imurmurhash": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", - "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==", - "dev": true + "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==" }, "indent-string": { "version": "4.0.0", @@ -3330,6 +3465,14 @@ "statsd-parser": "~0.0.4" } }, + "make-dir": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-3.0.2.tgz", + "integrity": "sha512-rYKABKutXa6vXTXhoV18cBE7PaewPXHe/Bdq4v+ZLMhxbWApkFFplT0LcbMW+6BbjnQXzZ/sAvSE/JdguApG5w==", + "requires": { + "semver": "^6.0.0" + } + }, "make-plural": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/make-plural/-/make-plural-4.3.0.tgz", @@ -3432,8 +3575,7 @@ "mimic-fn": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", - "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==", - "dev": true + "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==" }, "minimatch": { "version": "3.0.4", @@ -3720,7 +3862,6 @@ "version": "5.1.0", "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.0.tgz", "integrity": "sha512-5NcSkPHhwTVFIQN+TUqXoS5+dlElHXdpAWu9I0HP20YOtIi+aZ0Ct82jdlILDxjLEAWwvm+qj1m6aEtsDVmm6Q==", - "dev": true, "requires": { "mimic-fn": "^2.1.0" } @@ -4990,8 +5131,7 @@ "signal-exit": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.2.tgz", - "integrity": "sha512-meQNNykwecVxdu1RlYMKpQx4+wefIYpmxi6gexo/KAbwquJrBUrBmKYJrE8KFkVQAAVWEnwNdu21PgrD77J3xA==", - "dev": true + "integrity": "sha512-meQNNykwecVxdu1RlYMKpQx4+wefIYpmxi6gexo/KAbwquJrBUrBmKYJrE8KFkVQAAVWEnwNdu21PgrD77J3xA==" }, "sinon": { "version": "7.1.1", @@ -5055,6 +5195,11 @@ "to-snake-case": "^1.0.0" } }, + "snakeize": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/snakeize/-/snakeize-0.1.0.tgz", + "integrity": "sha1-EMCI2LWOsHazIpu1oE4jLOEmQi0=" + }, "source-map": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", @@ -5455,6 +5600,19 @@ "mime-types": "~2.1.24" } }, + "typedarray": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz", + "integrity": "sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c=" + }, + "typedarray-to-buffer": { + "version": "3.1.5", + "resolved": "https://registry.npmjs.org/typedarray-to-buffer/-/typedarray-to-buffer-3.1.5.tgz", + "integrity": "sha512-zdu8XMNEDepKKR+XYOXAVPtWui0ly0NtohUscw+UmaHiAWT8hrV1rr//H6V+0DvJ3OQ19S979M0laLfX8rm82Q==", + "requires": { + "is-typedarray": "^1.0.0" + } + }, "typescript": { "version": "3.8.2", "resolved": "https://registry.npmjs.org/typescript/-/typescript-3.8.2.tgz", @@ -5466,6 +5624,14 @@ "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.6.0.tgz", "integrity": "sha512-z4o1fvKUojIWh9XuaVLUDdf86RQiq13AC1dmHbTpoyuu+bquHms76v16CjycCbec87J7z0k//SiQVk0sMdFmpQ==" }, + "unique-string": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/unique-string/-/unique-string-2.0.0.tgz", + "integrity": "sha512-uNaeirEPvpZWSgzwsPGtU2zVSTrn/8L5q/IexZmH0eH6SA73CmAA5U4GwORTxQAZs95TAXLNqeLoPPNO5gZfWg==", + "requires": { + "crypto-random-string": "^2.0.0" + } + }, "unpipe": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", @@ -5686,6 +5852,22 @@ "mkdirp": "^0.5.1" } }, + "write-file-atomic": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-3.0.3.tgz", + "integrity": "sha512-AvHcyZ5JnSfq3ioSyjrBkH9yW4m7Ayk8/9My/DD9onKeu/94fwrMocemO2QAJFAlnnDN+ZDS+ZjAR5ua1/PV/Q==", + "requires": { + "imurmurhash": "^0.1.4", + "is-typedarray": "^1.0.0", + "signal-exit": "^3.0.2", + "typedarray-to-buffer": "^3.1.5" + } + }, + "xdg-basedir": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/xdg-basedir/-/xdg-basedir-4.0.0.tgz", + "integrity": "sha512-PSNhEJDejZYV7h50BohL09Er9VaIefr2LMAf3OEmpCkjOi34eYyQYAXUTjEQtZJTKcF0E2UKTh+osDLsgNim9Q==" + }, "xml2js": { "version": "0.4.19", "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.4.19.tgz", @@ -5700,6 +5882,11 @@ "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-9.0.7.tgz", "integrity": "sha512-7YXTQc3P2l9+0rjaUbLwMKRhtmwg1M1eDf6nag7urC7pIPYLD9W/jmzQ4ptRSUbodw5S0jfoGTflLemQibSpeQ==" }, + "xtend": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", + "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==" + }, "y18n": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.0.tgz", diff --git a/services/filestore/package.json b/services/filestore/package.json index ca56581131..6f7d84c778 100644 --- a/services/filestore/package.json +++ b/services/filestore/package.json @@ -20,6 +20,7 @@ "test:unit:_run": "mocha --recursive --reporter spec $@ test/unit/js" }, "dependencies": { + "@google-cloud/storage": "^4.3.0", "@overleaf/o-error": "^2.1.0", "aws-sdk": "^2.628.0", "body-parser": "^1.2.0", From 366ce97169addea6cc65b34a7cb24cf8886897b8 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Wed, 12 Feb 2020 11:00:45 +0000 Subject: [PATCH 03/31] Add GCS Persistor --- services/filestore/app/js/GcsPersistor.js | 295 ++++++++++++++++++ services/filestore/app/js/PersistorManager.js | 2 + 2 files changed, 297 insertions(+) create mode 100644 services/filestore/app/js/GcsPersistor.js diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js new file mode 100644 index 0000000000..350b0d451c --- /dev/null +++ b/services/filestore/app/js/GcsPersistor.js @@ -0,0 +1,295 @@ +const settings = require('settings-sharelatex') +const metrics = require('metrics-sharelatex') +const fs = require('fs') +const { promisify } = require('util') +const Stream = require('stream') +const { Storage } = require('@google-cloud/storage') +const { callbackify } = require('util') +const { WriteError, ReadError, NotFoundError } = require('./Errors') +const PersistorHelper = require('./PersistorHelper') + +const pipeline = promisify(Stream.pipeline) + +function base64ToHex(base64) { + return Buffer.from(base64, 'base64').toString('hex') +} + +// both of these settings will be null by default except for tests +// that's OK - GCS uses the locally-configured service account by default +const storage = new Storage(settings.filestore.gcs) +// workaround for broken uploads with custom endpoints: +// https://github.com/googleapis/nodejs-storage/issues/898 +if (settings.filestore.gcs.apiEndpoint) { + storage.interceptors.push({ + request: function(reqOpts) { + const url = new URL(reqOpts.uri) + url.host = settings.filestore.gcs.apiEndpoint + reqOpts.uri = url.toString() + return reqOpts + } + }) +} + +const GcsPersistor = { + sendFile: callbackify(sendFile), + sendStream: callbackify(sendStream), + getFileStream: callbackify(getFileStream), + getFileMd5Hash: callbackify(getFileMd5Hash), + deleteDirectory: callbackify(deleteDirectory), + getFileSize: callbackify(getFileSize), + deleteFile: callbackify(deleteFile), + copyFile: callbackify(copyFile), + checkIfFileExists: callbackify(checkIfFileExists), + directorySize: callbackify(directorySize), + promises: { + sendFile, + sendStream, + getFileStream, + getFileMd5Hash, + deleteDirectory, + getFileSize, + deleteFile, + copyFile, + checkIfFileExists, + directorySize + } +} + +module.exports = GcsPersistor + +async function sendFile(bucket, key, fsPath) { + let readStream + try { + readStream = fs.createReadStream(fsPath) + } catch (err) { + throw PersistorHelper.wrapError( + err, + 'error reading file from disk', + { bucketName: bucket, key, fsPath }, + ReadError + ) + } + return sendStream(bucket, key, readStream) +} + +async function sendStream(bucket, key, readStream, sourceMd5) { + try { + let hashPromise + + // if there is no supplied md5 hash, we calculate the hash as the data passes through + if (!sourceMd5) { + hashPromise = PersistorHelper.calculateStreamMd5(readStream) + } + + const meteredStream = PersistorHelper.getMeteredStream( + readStream, + (_, byteCount) => { + metrics.count('gcs.egress', byteCount) + } + ) + + const writeOptions = { + resumable: false // recommended by Google + } + + if (sourceMd5) { + writeOptions.validation = 'md5' + writeOptions.metadata = { + md5Hash: sourceMd5 + } + } + + const uploadStream = storage + .bucket(bucket) + .file(key) + .createWriteStream(writeOptions) + + await pipeline(meteredStream, uploadStream) + + // if we didn't have an md5 hash, we should compare our computed one with Google's + // as we couldn't tell GCS about it beforehand + if (hashPromise) { + sourceMd5 = await hashPromise + // throws on mismatch + await PersistorHelper.verifyMd5(GcsPersistor, bucket, key, sourceMd5) + } + } catch (err) { + throw PersistorHelper.wrapError( + err, + 'upload to GCS failed', + { bucket, key }, + WriteError + ) + } +} + +async function getFileStream(bucket, key, opts) { + if (opts.end) { + // S3 (and http range headers) treat 'end' as inclusive, so increase this by 1 + opts.end++ + } + const stream = storage + .bucket(bucket) + .file(key) + .createReadStream(opts) + + const meteredStream = PersistorHelper.getMeteredStream(stream, (_, bytes) => { + // ignore the error parameter and just log the byte count + metrics.count('gcs.ingress', bytes) + }) + + try { + await PersistorHelper.waitForStreamReady(stream) + return meteredStream + } catch (err) { + throw PersistorHelper.wrapError( + err, + 'error reading file from GCS', + { bucket, key, opts }, + ReadError + ) + } +} + +async function getFileSize(bucket, key) { + try { + const metadata = await storage + .bucket(bucket) + .file(key) + .getMetadata() + return metadata[0].size + } catch (err) { + throw PersistorHelper.wrapError( + err, + 'error getting size of GCS object', + { bucket, key }, + ReadError + ) + } +} + +async function getFileMd5Hash(bucket, key) { + try { + const metadata = await storage + .bucket(bucket) + .file(key) + .getMetadata() + return base64ToHex(metadata[0].md5Hash) + } catch (err) { + throw PersistorHelper.wrapError( + err, + 'error getting hash of GCS object', + { bucket, key }, + ReadError + ) + } +} + +async function deleteFile(bucket, key) { + try { + await storage + .bucket(bucket) + .file(key) + .delete() + } catch (err) { + const error = PersistorHelper.wrapError( + err, + 'error deleting GCS object', + { bucket, key }, + WriteError + ) + if (!(error instanceof NotFoundError)) { + throw error + } + } +} + +async function deleteDirectory(bucket, key) { + let files + + try { + const response = await storage.bucket(bucket).getFiles({ directory: key }) + files = response[0] + } catch (err) { + const error = PersistorHelper.wrapError( + err, + 'failed to list objects in GCS', + { bucket, key }, + ReadError + ) + if (error instanceof NotFoundError) { + return + } + throw error + } + + for (const index in files) { + try { + await files[index].delete() + } catch (err) { + const error = PersistorHelper.wrapError( + err, + 'failed to delete object in GCS', + { bucket, key }, + WriteError + ) + if (!(error instanceof NotFoundError)) { + throw error + } + } + } +} + +async function directorySize(bucket, key) { + let files + + try { + const response = await storage.bucket(bucket).getFiles({ directory: key }) + files = response[0] + } catch (err) { + throw PersistorHelper.wrapError( + err, + 'failed to list objects in GCS', + { bucket, key }, + ReadError + ) + } + + return files.reduce((acc, file) => Number(file.metadata.size) + acc, 0) +} + +async function checkIfFileExists(bucket, key) { + try { + const response = await storage + .bucket(bucket) + .file(key) + .exists() + return response[0] + } catch (err) { + throw PersistorHelper.wrapError( + err, + 'error checking if file exists in GCS', + { bucket, key }, + ReadError + ) + } +} + +async function copyFile(bucket, sourceKey, destKey) { + try { + const src = storage.bucket(bucket).file(sourceKey) + const dest = storage.bucket(bucket).file(destKey) + await src.copy(dest) + } catch (err) { + // fake-gcs-server has a bug that returns an invalid response when the file does not exist + if (err.message === 'Cannot parse response as JSON: not found\n') { + err.code = 404 + } + throw PersistorHelper.wrapError( + err, + 'failed to copy file in GCS', + { bucket, sourceKey, destKey }, + WriteError + ) + } +} diff --git a/services/filestore/app/js/PersistorManager.js b/services/filestore/app/js/PersistorManager.js index 32f6cd41f8..d26ab77a92 100644 --- a/services/filestore/app/js/PersistorManager.js +++ b/services/filestore/app/js/PersistorManager.js @@ -19,6 +19,8 @@ function getPersistor(backend) { return require('./S3Persistor') case 'fs': return require('./FSPersistor') + case 'gcs': + return require('./GcsPersistor') default: throw new Error(`unknown filestore backend: ${backend}`) } From 2cfab8d3137093afc58c4abc5582106fcf1f2400 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Wed, 12 Feb 2020 11:01:51 +0000 Subject: [PATCH 04/31] Add GCS-specific acceptance tests --- .../test/acceptance/js/FilestoreTests.js | 136 ++++++------------ .../test/acceptance/js/TestConfig.js | 107 ++++++++++++++ 2 files changed, 149 insertions(+), 94 deletions(-) create mode 100644 services/filestore/test/acceptance/js/TestConfig.js diff --git a/services/filestore/test/acceptance/js/FilestoreTests.js b/services/filestore/test/acceptance/js/FilestoreTests.js index c6a1e08444..599d60155a 100644 --- a/services/filestore/test/acceptance/js/FilestoreTests.js +++ b/services/filestore/test/acceptance/js/FilestoreTests.js @@ -11,6 +11,7 @@ const S3 = require('aws-sdk/clients/s3') const Stream = require('stream') const request = require('request') const { promisify } = require('util') +const { Storage } = require('@google-cloud/storage') const streamifier = require('streamifier') chai.use(require('chai-as-promised')) @@ -42,89 +43,7 @@ function streamToString(stream) { // store settings for multiple backends, so that we can test each one. // fs will always be available - add others if they are configured -const BackendSettings = { - FSPersistor: { - backend: 'fs', - stores: { - user_files: Path.resolve(__dirname, '../../../user_files'), - public_files: Path.resolve(__dirname, '../../../public_files'), - template_files: Path.resolve(__dirname, '../../../template_files') - } - }, - S3Persistor: { - backend: 's3', - s3: { - key: process.env.AWS_ACCESS_KEY_ID, - secret: process.env.AWS_SECRET_ACCESS_KEY, - endpoint: process.env.AWS_S3_ENDPOINT, - pathStyle: true, - partSize: 100 * 1024 * 1024 - }, - stores: { - user_files: process.env.AWS_S3_USER_FILES_BUCKET_NAME, - template_files: process.env.AWS_S3_TEMPLATE_FILES_BUCKET_NAME, - public_files: process.env.AWS_S3_PUBLIC_FILES_BUCKET_NAME - } - }, - FallbackS3ToFSPersistor: { - backend: 's3', - s3: { - key: process.env.AWS_ACCESS_KEY_ID, - secret: process.env.AWS_SECRET_ACCESS_KEY, - endpoint: process.env.AWS_S3_ENDPOINT, - pathStyle: true, - partSize: 100 * 1024 * 1024 - }, - stores: { - user_files: process.env.AWS_S3_USER_FILES_BUCKET_NAME, - template_files: process.env.AWS_S3_TEMPLATE_FILES_BUCKET_NAME, - public_files: process.env.AWS_S3_PUBLIC_FILES_BUCKET_NAME - }, - fallback: { - backend: 'fs', - buckets: { - [process.env.AWS_S3_USER_FILES_BUCKET_NAME]: Path.resolve( - __dirname, - '../../../user_files' - ), - [process.env.AWS_S3_PUBLIC_FILES_BUCKET_NAME]: Path.resolve( - __dirname, - '../../../public_files' - ), - [process.env.AWS_S3_TEMPLATE_FILES_BUCKET_NAME]: Path.resolve( - __dirname, - '../../../template_files' - ) - } - } - }, - FallbackFSToS3Persistor: { - backend: 'fs', - s3: { - key: process.env.AWS_ACCESS_KEY_ID, - secret: process.env.AWS_SECRET_ACCESS_KEY, - endpoint: process.env.AWS_S3_ENDPOINT, - pathStyle: true, - partSize: 100 * 1024 * 1024 - }, - stores: { - user_files: Path.resolve(__dirname, '../../../user_files'), - public_files: Path.resolve(__dirname, '../../../public_files'), - template_files: Path.resolve(__dirname, '../../../template_files') - }, - fallback: { - backend: 's3', - buckets: { - [Path.resolve(__dirname, '../../../user_files')]: process.env - .AWS_S3_USER_FILES_BUCKET_NAME, - [Path.resolve(__dirname, '../../../public_files')]: process.env - .AWS_S3_PUBLIC_FILES_BUCKET_NAME, - [Path.resolve(__dirname, '../../../template_files')]: process.env - .AWS_S3_TEMPLATE_FILES_BUCKET_NAME - } - } - } -} +const BackendSettings = require('./TestConfig') describe('Filestore', function() { this.timeout(1000 * 10) @@ -134,7 +53,7 @@ describe('Filestore', function() { // redefine the test suite for every available backend Object.keys(BackendSettings).forEach(backend => { describe(backend, function() { - let app, previousEgress, previousIngress, projectId + let app, previousEgress, previousIngress, metricPrefix, projectId before(async function() { // create the app with the relevant filestore settings @@ -143,13 +62,27 @@ describe('Filestore', function() { await app.runServer() }) + if (BackendSettings[backend].gcs) { + before(async function() { + const storage = new Storage(Settings.filestore.gcs) + await storage.createBucket(process.env.GCS_USER_FILES_BUCKET_NAME) + await storage.createBucket(process.env.GCS_PUBLIC_FILES_BUCKET_NAME) + await storage.createBucket(process.env.GCS_TEMPLATE_FILES_BUCKET_NAME) + }) + } + after(async function() { return app.stop() }) beforeEach(async function() { - if (Settings.filestore.backend === 's3') { - previousEgress = await getMetric(filestoreUrl, 's3_egress') + // retrieve previous metrics from the app + if (['s3', 'gcs'].includes(Settings.filestore.backend)) { + metricPrefix = Settings.filestore.backend + previousEgress = await getMetric( + filestoreUrl, + `${metricPrefix}_egress` + ) } projectId = `acceptance_tests_${Math.random()}` }) @@ -195,8 +128,11 @@ describe('Filestore', function() { // The upload request can bump the ingress metric. // The content hash validation might require a full download // in case the ETag field of the upload response is not a md5 sum. - if (Settings.filestore.backend === 's3') { - previousIngress = await getMetric(filestoreUrl, 's3_ingress') + if (['s3', 'gcs'].includes(Settings.filestore.backend)) { + previousIngress = await getMetric( + filestoreUrl, + `${metricPrefix}_ingress` + ) } }) @@ -285,15 +221,21 @@ describe('Filestore', function() { expect(response.body).to.equal(newContent) }) - if (backend === 'S3Persistor') { + if (['S3Persistor', 'GcsPersistor'].includes(backend)) { it('should record an egress metric for the upload', async function() { - const metric = await getMetric(filestoreUrl, 's3_egress') + const metric = await getMetric( + filestoreUrl, + `${metricPrefix}_egress` + ) expect(metric - previousEgress).to.equal(constantFileContent.length) }) it('should record an ingress metric when downloading the file', async function() { await rp.get(fileUrl) - const metric = await getMetric(filestoreUrl, 's3_ingress') + const metric = await getMetric( + filestoreUrl, + `${metricPrefix}_ingress` + ) expect(metric - previousIngress).to.equal( constantFileContent.length ) @@ -307,7 +249,10 @@ describe('Filestore', function() { } } await rp.get(options) - const metric = await getMetric(filestoreUrl, 's3_ingress') + const metric = await getMetric( + filestoreUrl, + `${metricPrefix}_ingress` + ) expect(metric - previousIngress).to.equal(9) }) } @@ -827,9 +772,12 @@ describe('Filestore', function() { expect(response.body.substring(0, 8)).to.equal('%PDF-1.5') }) - if (backend === 'S3Persistor') { + if (['S3Persistor', 'GcsPersistor'].includes(backend)) { it('should record an egress metric for the upload', async function() { - const metric = await getMetric(filestoreUrl, 's3_egress') + const metric = await getMetric( + filestoreUrl, + `${metricPrefix}_egress` + ) expect(metric - previousEgress).to.equal(localFileSize) }) } diff --git a/services/filestore/test/acceptance/js/TestConfig.js b/services/filestore/test/acceptance/js/TestConfig.js new file mode 100644 index 0000000000..e673ace71c --- /dev/null +++ b/services/filestore/test/acceptance/js/TestConfig.js @@ -0,0 +1,107 @@ +const Path = require('path') + +// use functions to get a fresh copy, not a reference, each time +function s3Config() { + return { + key: process.env.AWS_ACCESS_KEY_ID, + secret: process.env.AWS_SECRET_ACCESS_KEY, + endpoint: process.env.AWS_S3_ENDPOINT, + pathStyle: true, + partSize: 100 * 1024 * 1024 + } +} + +function s3Stores() { + return { + user_files: process.env.AWS_S3_USER_FILES_BUCKET_NAME, + template_files: process.env.AWS_S3_TEMPLATE_FILES_BUCKET_NAME, + public_files: process.env.AWS_S3_PUBLIC_FILES_BUCKET_NAME + } +} + +function gcsConfig() { + return { + apiEndpoint: process.env.GCS_API_ENDPOINT, + projectId: 'fake' + } +} + +function gcsStores() { + return { + user_files: process.env.GCS_USER_FILES_BUCKET_NAME, + template_files: process.env.GCS_TEMPLATE_FILES_BUCKET_NAME, + public_files: process.env.GCS_PUBLIC_FILES_BUCKET_NAME + } +} + +function fsStores() { + return { + user_files: Path.resolve(__dirname, '../../../user_files'), + public_files: Path.resolve(__dirname, '../../../public_files'), + template_files: Path.resolve(__dirname, '../../../template_files') + } +} + +function fallbackStores(primaryConfig, fallbackConfig) { + return { + [primaryConfig.user_files]: fallbackConfig.user_files, + [primaryConfig.public_files]: fallbackConfig.public_files, + [primaryConfig.template_files]: fallbackConfig.template_files + } +} + +module.exports = { + FSPersistor: { + backend: 'fs', + stores: fsStores() + }, + S3Persistor: { + backend: 's3', + s3: s3Config(), + stores: s3Stores() + }, + GcsPersistor: { + backend: 'gcs', + gcs: gcsConfig(), + stores: gcsStores() + }, + FallbackS3ToFSPersistor: { + backend: 's3', + s3: s3Config(), + stores: s3Stores(), + fallback: { + backend: 'fs', + buckets: fallbackStores(s3Stores(), fsStores()) + } + }, + FallbackFSToS3Persistor: { + backend: 'fs', + s3: s3Config(), + stores: fsStores(), + fallback: { + backend: 's3', + buckets: fallbackStores(fsStores(), s3Stores()) + } + }, + FallbackGcsToS3Persistor: { + backend: 'gcs', + gcs: gcsConfig(), + stores: gcsStores(), + s3: s3Config(), + fallback: { + backend: 's3', + buckets: fallbackStores(gcsStores(), s3Stores()) + } + }, + FallbackS3ToGcsPersistor: { + backend: 's3', + // can use the same bucket names for gcs and s3 (in tests) + stores: s3Stores(), + s3: s3Config(), + gcs: gcsConfig(), + fallback: { + backend: 'gcs', + buckets: fallbackStores(s3Stores(), gcsStores()) + } + } +} From 9dddf2520992c0704a7279c990d80f92094cdcd9 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Wed, 12 Feb 2020 11:02:05 +0000 Subject: [PATCH 05/31] Add note on gcs config to config file --- services/filestore/config/settings.defaults.coffee | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/services/filestore/config/settings.defaults.coffee b/services/filestore/config/settings.defaults.coffee index 251fb073b4..6ffe5a8523 100644 --- a/services/filestore/config/settings.defaults.coffee +++ b/services/filestore/config/settings.defaults.coffee @@ -31,6 +31,7 @@ settings = # Choices are # s3 - Amazon S3 # fs - local filesystem + # gcs - Google Cloud Storage backend: process.env['BACKEND'] s3: @@ -41,6 +42,9 @@ settings = pathStyle: process.env['AWS_S3_PATH_STYLE'] partSize: process.env['AWS_S3_PARTSIZE'] or (100 * 1024 * 1024) + # GCS should be configured by the service account on the kubernetes pod. See GOOGLE_APPLICATION_CREDENTIALS, + # which will be picked up automatically. + stores: user_files: process.env['USER_FILES_BUCKET_NAME'] template_files: process.env['TEMPLATE_FILES_BUCKET_NAME'] From e2f3dd23c93bb8de2da9f8c91935877ac4394c24 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Wed, 12 Feb 2020 13:44:38 +0000 Subject: [PATCH 06/31] Switch back to official fake-gcs-server image --- services/filestore/test/acceptance/deps/Dockerfile.fake-gcs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/filestore/test/acceptance/deps/Dockerfile.fake-gcs b/services/filestore/test/acceptance/deps/Dockerfile.fake-gcs index 694bcdac9e..f8313cbce0 100644 --- a/services/filestore/test/acceptance/deps/Dockerfile.fake-gcs +++ b/services/filestore/test/acceptance/deps/Dockerfile.fake-gcs @@ -1,4 +1,4 @@ -FROM gh2k/fake-gcs-server +FROM fsouza/fake-gcs-server RUN apk add --update --no-cache curl COPY healthcheck.sh /healthcheck.sh HEALTHCHECK --interval=1s --timeout=1s --retries=30 CMD /healthcheck.sh http://localhost:9090 From d9c9d74994d6a660d49c2b644dbdf0067558f65a Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Thu, 13 Feb 2020 15:47:42 +0000 Subject: [PATCH 07/31] Remove unnecessary test for S3 file deletion S3 does not throw a not-found error when deleting a file that does not exist --- .../test/unit/js/S3PersistorTests.js | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/services/filestore/test/unit/js/S3PersistorTests.js b/services/filestore/test/unit/js/S3PersistorTests.js index ac104e36f2..aa9444428e 100644 --- a/services/filestore/test/unit/js/S3PersistorTests.js +++ b/services/filestore/test/unit/js/S3PersistorTests.js @@ -675,25 +675,6 @@ describe('S3PersistorTests', function() { }) }) }) - - describe('when the file does not exist', function() { - let error - - beforeEach(async function() { - S3Client.deleteObject = sinon.stub().returns({ - promise: sinon.stub().rejects(S3NotFoundError) - }) - try { - await S3Persistor.promises.deleteFile(bucket, key) - } catch (err) { - error = err - } - }) - - it('should throw a NotFoundError', function() { - expect(error).to.be.an.instanceOf(Errors.NotFoundError) - }) - }) }) describe('deleteDirectory', function() { From e58284aefeec7ded2d00764e7f0a0f434fc4a0ad Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Thu, 13 Feb 2020 16:55:01 +0000 Subject: [PATCH 08/31] Move base64/hex methods to PersistorHelper Also add some null-safety checks --- services/filestore/app/js/GcsPersistor.js | 12 ++++-------- services/filestore/app/js/PersistorHelper.js | 12 +++++++++++- services/filestore/app/js/S3Persistor.js | 6 +----- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index 350b0d451c..690df70252 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -10,16 +10,12 @@ const PersistorHelper = require('./PersistorHelper') const pipeline = promisify(Stream.pipeline) -function base64ToHex(base64) { - return Buffer.from(base64, 'base64').toString('hex') -} - // both of these settings will be null by default except for tests // that's OK - GCS uses the locally-configured service account by default const storage = new Storage(settings.filestore.gcs) // workaround for broken uploads with custom endpoints: // https://github.com/googleapis/nodejs-storage/issues/898 -if (settings.filestore.gcs.apiEndpoint) { +if (settings.filestore.gcs && settings.filestore.gcs.apiEndpoint) { storage.interceptors.push({ request: function(reqOpts) { const url = new URL(reqOpts.uri) @@ -95,7 +91,7 @@ async function sendStream(bucket, key, readStream, sourceMd5) { if (sourceMd5) { writeOptions.validation = 'md5' writeOptions.metadata = { - md5Hash: sourceMd5 + md5Hash: PersistorHelper.hexToBase64(sourceMd5) } } @@ -123,7 +119,7 @@ async function sendStream(bucket, key, readStream, sourceMd5) { } } -async function getFileStream(bucket, key, opts) { +async function getFileStream(bucket, key, opts = {}) { if (opts.end) { // S3 (and http range headers) treat 'end' as inclusive, so increase this by 1 opts.end++ @@ -174,7 +170,7 @@ async function getFileMd5Hash(bucket, key) { .bucket(bucket) .file(key) .getMetadata() - return base64ToHex(metadata[0].md5Hash) + return PersistorHelper.base64ToHex(metadata[0].md5Hash) } catch (err) { throw PersistorHelper.wrapError( err, diff --git a/services/filestore/app/js/PersistorHelper.js b/services/filestore/app/js/PersistorHelper.js index ea8132a9c9..409f3182f1 100644 --- a/services/filestore/app/js/PersistorHelper.js +++ b/services/filestore/app/js/PersistorHelper.js @@ -12,7 +12,9 @@ module.exports = { verifyMd5, getMeteredStream, waitForStreamReady, - wrapError + wrapError, + hexToBase64, + base64ToHex } // returns a promise which resolves with the md5 hash of the stream @@ -103,3 +105,11 @@ function wrapError(error, message, params, ErrorType) { }).withCause(error) } } + +function base64ToHex(base64) { + return Buffer.from(base64, 'base64').toString('hex') +} + +function hexToBase64(hex) { + return Buffer.from(hex, 'hex').toString('base64') +} diff --git a/services/filestore/app/js/S3Persistor.js b/services/filestore/app/js/S3Persistor.js index fc505ccfbb..7b69ad5d26 100644 --- a/services/filestore/app/js/S3Persistor.js +++ b/services/filestore/app/js/S3Persistor.js @@ -46,10 +46,6 @@ const S3Persistor = { module.exports = S3Persistor -function hexToBase64(hex) { - return Buffer.from(hex, 'hex').toString('base64') -} - async function sendFile(bucketName, key, fsPath) { let readStream try { @@ -72,7 +68,7 @@ async function sendStream(bucketName, key, readStream, sourceMd5) { let b64Hash if (sourceMd5) { - b64Hash = hexToBase64(sourceMd5) + b64Hash = PersistorHelper.hexToBase64(sourceMd5) } else { hashPromise = PersistorHelper.calculateStreamMd5(readStream) } From 12274e1427f31d09f11becd8ac0bc424d7093df3 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Thu, 13 Feb 2020 16:55:47 +0000 Subject: [PATCH 09/31] Add unit tests for GCS Persistor --- .../test/unit/js/GcsPersistorTests.js | 756 ++++++++++++++++++ 1 file changed, 756 insertions(+) create mode 100644 services/filestore/test/unit/js/GcsPersistorTests.js diff --git a/services/filestore/test/unit/js/GcsPersistorTests.js b/services/filestore/test/unit/js/GcsPersistorTests.js new file mode 100644 index 0000000000..6264fad0ff --- /dev/null +++ b/services/filestore/test/unit/js/GcsPersistorTests.js @@ -0,0 +1,756 @@ +const sinon = require('sinon') +const chai = require('chai') +const { expect } = chai +const modulePath = '../../../app/js/GcsPersistor.js' +const SandboxedModule = require('sandboxed-module') + +const Errors = require('../../../app/js/Errors') + +describe('GcsPersistorTests', function() { + const filename = '/wombat/potato.tex' + const bucket = 'womBucket' + const key = 'monKey' + const destKey = 'donKey' + const objectSize = 5555 + const genericError = new Error('guru meditation error') + const filesSize = 33 + const md5 = 'ffffffff00000000ffffffff00000000' + const WriteStream = 'writeStream' + + let Metrics, + Logger, + Storage, + Fs, + NotFoundError, + Meter, + MeteredStream, + ReadStream, + Stream, + GcsBucket, + GcsFile, + GcsPersistor, + FileNotFoundError, + Hash, + settings, + crypto, + files + + beforeEach(function() { + settings = { + filestore: { + backend: 'gcs', + stores: { + user_files: 'user_files' + } + } + } + + files = [ + { + metadata: { size: 11, md5Hash: '/////wAAAAD/////AAAAAA==' }, + delete: sinon.stub() + }, + { + metadata: { size: 22, md5Hash: '/////wAAAAD/////AAAAAA==' }, + delete: sinon.stub() + } + ] + + ReadStream = { + pipe: sinon.stub().returns('readStream'), + on: sinon + .stub() + .withArgs('end') + .yields(), + removeListener: sinon.stub() + } + + Stream = { + pipeline: sinon.stub().yields() + } + + Metrics = { + count: sinon.stub() + } + + GcsFile = { + delete: sinon.stub().resolves(), + createReadStream: sinon.stub().returns(ReadStream), + getMetadata: sinon.stub().resolves([files[0].metadata]), + createWriteStream: sinon.stub().returns(WriteStream), + copy: sinon.stub().resolves(), + exists: sinon.stub().resolves([true]) + } + + GcsBucket = { + file: sinon.stub().returns(GcsFile), + getFiles: sinon.stub().resolves([files]) + } + + Storage = class { + constructor() { + this.interceptors = [] + } + } + Storage.prototype.bucket = sinon.stub().returns(GcsBucket) + + NotFoundError = new Error('File not found') + NotFoundError.code = 404 + + Fs = { + createReadStream: sinon.stub().returns(ReadStream) + } + + FileNotFoundError = new Error('File not found') + FileNotFoundError.code = 'ENOENT' + + MeteredStream = { + type: 'metered', + on: sinon.stub(), + bytes: objectSize + } + MeteredStream.on.withArgs('finish').yields() + MeteredStream.on.withArgs('readable').yields() + Meter = sinon.stub().returns(MeteredStream) + + Hash = { + end: sinon.stub(), + read: sinon.stub().returns(md5), + setEncoding: sinon.stub() + } + crypto = { + createHash: sinon.stub().returns(Hash) + } + + Logger = { + warn: sinon.stub() + } + + GcsPersistor = SandboxedModule.require(modulePath, { + requires: { + '@google-cloud/storage': { Storage }, + 'settings-sharelatex': settings, + 'logger-sharelatex': Logger, + './Errors': Errors, + fs: Fs, + 'stream-meter': Meter, + stream: Stream, + 'metrics-sharelatex': Metrics, + crypto + }, + globals: { console } + }) + }) + + describe('getFileStream', function() { + describe('when called with valid parameters', function() { + let stream + + beforeEach(async function() { + stream = await GcsPersistor.promises.getFileStream(bucket, key) + }) + + it('returns a metered stream', function() { + expect(stream).to.equal(MeteredStream) + }) + + it('fetches the right key from the right bucket', function() { + expect(Storage.prototype.bucket).to.have.been.calledWith(bucket) + expect(GcsBucket.file).to.have.been.calledWith(key) + expect(GcsFile.createReadStream).to.have.been.called + }) + + it('pipes the stream through the meter', function() { + expect(Stream.pipeline).to.have.been.calledWith( + ReadStream, + MeteredStream + ) + }) + + it('records an ingress metric', function() { + expect(Metrics.count).to.have.been.calledWith('gcs.ingress', objectSize) + }) + }) + + describe('when called with a byte range', function() { + let stream + + beforeEach(async function() { + stream = await GcsPersistor.promises.getFileStream(bucket, key, { + start: 5, + end: 10 + }) + }) + + it('returns a metered stream', function() { + expect(stream).to.equal(MeteredStream) + }) + + it('passes the byte range on to GCS', function() { + expect(GcsFile.createReadStream).to.have.been.calledWith({ + start: 5, + end: 11 // we increment the end because Google's 'end' is exclusive + }) + }) + }) + + describe("when the file doesn't exist", function() { + let error, stream + + beforeEach(async function() { + ReadStream.on = sinon.stub() + ReadStream.on.withArgs('error').yields(NotFoundError) + try { + stream = await GcsPersistor.promises.getFileStream(bucket, key) + } catch (err) { + error = err + } + }) + + it('does not return a stream', function() { + expect(stream).not.to.exist + }) + + it('throws a NotFoundError', function() { + expect(error).to.be.an.instanceOf(Errors.NotFoundError) + }) + + it('wraps the error', function() { + expect(error.cause).to.exist + }) + + it('stores the bucket and key in the error', function() { + expect(error.info).to.include({ bucket: bucket, key: key }) + }) + }) + + describe('when Gcs encounters an unkown error', function() { + let error, stream + + beforeEach(async function() { + ReadStream.on = sinon.stub() + ReadStream.on.withArgs('error').yields(genericError) + try { + stream = await GcsPersistor.promises.getFileStream(bucket, key) + } catch (err) { + error = err + } + }) + + it('does not return a stream', function() { + expect(stream).not.to.exist + }) + + it('throws a ReadError', function() { + expect(error).to.be.an.instanceOf(Errors.ReadError) + }) + + it('wraps the error', function() { + expect(error.cause).to.exist + }) + + it('stores the bucket and key in the error', function() { + expect(error.info).to.include({ bucket: bucket, key: key }) + }) + }) + }) + + describe('getFileSize', function() { + describe('when called with valid parameters', function() { + let size + + beforeEach(async function() { + size = await GcsPersistor.promises.getFileSize(bucket, key) + }) + + it('should return the object size', function() { + expect(size).to.equal(files[0].metadata.size) + }) + + it('should pass the bucket and key to GCS', function() { + expect(Storage.prototype.bucket).to.have.been.calledWith(bucket) + expect(GcsBucket.file).to.have.been.calledWith(key) + expect(GcsFile.getMetadata).to.have.been.called + }) + }) + + describe('when the object is not found', function() { + let error + + beforeEach(async function() { + GcsFile.getMetadata = sinon.stub().rejects(NotFoundError) + try { + await GcsPersistor.promises.getFileSize(bucket, key) + } catch (err) { + error = err + } + }) + + it('should return a NotFoundError', function() { + expect(error).to.be.an.instanceOf(Errors.NotFoundError) + }) + + it('should wrap the error', function() { + expect(error.cause).to.equal(NotFoundError) + }) + }) + + describe('when GCS returns an error', function() { + let error + + beforeEach(async function() { + GcsFile.getMetadata = sinon.stub().rejects(genericError) + try { + await GcsPersistor.promises.getFileSize(bucket, key) + } catch (err) { + error = err + } + }) + + it('should return a ReadError', function() { + expect(error).to.be.an.instanceOf(Errors.ReadError) + }) + + it('should wrap the error', function() { + expect(error.cause).to.equal(genericError) + }) + }) + }) + + describe('sendStream', function() { + describe('with valid parameters', function() { + beforeEach(async function() { + return GcsPersistor.promises.sendStream(bucket, key, ReadStream) + }) + + it('should upload the stream', function() { + expect(Storage.prototype.bucket).to.have.been.calledWith(bucket) + expect(GcsBucket.file).to.have.been.calledWith(key) + expect(GcsFile.createWriteStream).to.have.been.called + }) + + it('should not try to create a resumable upload', function() { + expect(GcsFile.createWriteStream).to.have.been.calledWith({ + resumable: false + }) + }) + + it('should meter the stream', function() { + expect(Stream.pipeline).to.have.been.calledWith( + ReadStream, + MeteredStream + ) + }) + + it('should pipe the metered stream to GCS', function() { + expect(Stream.pipeline).to.have.been.calledWith( + MeteredStream, + WriteStream + ) + }) + + it('should record an egress metric', function() { + expect(Metrics.count).to.have.been.calledWith('gcs.egress', objectSize) + }) + + it('calculates the md5 hash of the file', function() { + expect(Stream.pipeline).to.have.been.calledWith(ReadStream, Hash) + }) + }) + + describe('when a hash is supplied', function() { + beforeEach(async function() { + return GcsPersistor.promises.sendStream( + bucket, + key, + ReadStream, + 'aaaaaaaabbbbbbbbaaaaaaaabbbbbbbb' + ) + }) + + it('should not calculate the md5 hash of the file', function() { + expect(Stream.pipeline).not.to.have.been.calledWith( + sinon.match.any, + Hash + ) + }) + + it('sends the hash in base64', function() { + expect(GcsFile.createWriteStream).to.have.been.calledWith({ + validation: 'md5', + metadata: { + md5Hash: 'qqqqqru7u7uqqqqqu7u7uw==' + }, + resumable: false + }) + }) + + it('does not fetch the md5 hash of the uploaded file', function() { + expect(GcsFile.getMetadata).not.to.have.been.called + }) + }) + + describe('when the upload fails', function() { + let error + beforeEach(async function() { + Stream.pipeline + .withArgs(MeteredStream, WriteStream, sinon.match.any) + .yields(genericError) + try { + await GcsPersistor.promises.sendStream(bucket, key, ReadStream) + } catch (err) { + error = err + } + }) + + it('throws a WriteError', function() { + expect(error).to.be.an.instanceOf(Errors.WriteError) + }) + + it('wraps the error', function() { + expect(error.cause).to.equal(genericError) + }) + }) + }) + + describe('sendFile', function() { + describe('with valid parameters', function() { + beforeEach(async function() { + return GcsPersistor.promises.sendFile(bucket, key, filename) + }) + + it('should create a read stream for the file', function() { + expect(Fs.createReadStream).to.have.been.calledWith(filename) + }) + + it('should create a write stream', function() { + expect(Storage.prototype.bucket).to.have.been.calledWith(bucket) + expect(GcsBucket.file).to.have.been.calledWith(key) + expect(GcsFile.createWriteStream).to.have.been.called + }) + + it('should upload the stream via the meter', function() { + expect(Stream.pipeline).to.have.been.calledWith( + ReadStream, + MeteredStream + ) + expect(Stream.pipeline).to.have.been.calledWith( + MeteredStream, + WriteStream + ) + }) + }) + + describe('when the file does not exist', function() { + let error + + beforeEach(async function() { + Fs.createReadStream = sinon.stub().throws(FileNotFoundError) + try { + await GcsPersistor.promises.sendFile(bucket, key, filename) + } catch (err) { + error = err + } + }) + + it('returns a NotFoundError', function() { + expect(error).to.be.an.instanceOf(Errors.NotFoundError) + }) + + it('wraps the error', function() { + expect(error.cause).to.equal(FileNotFoundError) + }) + }) + + describe('when reading the file throws an error', function() { + let error + + beforeEach(async function() { + Fs.createReadStream = sinon.stub().throws(genericError) + try { + await GcsPersistor.promises.sendFile(bucket, key, filename) + } catch (err) { + error = err + } + }) + + it('returns a ReadError', function() { + expect(error).to.be.an.instanceOf(Errors.ReadError) + }) + + it('wraps the error', function() { + expect(error.cause).to.equal(genericError) + }) + }) + }) + + describe('copyFile', function() { + const DestinationFile = 'destFile' + + beforeEach(function() { + GcsBucket.file.withArgs(destKey).returns(DestinationFile) + }) + + describe('with valid parameters', function() { + beforeEach(async function() { + return GcsPersistor.promises.copyFile(bucket, key, destKey) + }) + + it('should copy the object', function() { + expect(Storage.prototype.bucket).to.have.been.calledWith(bucket) + expect(GcsBucket.file).to.have.been.calledWith(key) + expect(GcsFile.copy).to.have.been.calledWith(DestinationFile) + }) + }) + + describe('when the file does not exist', function() { + let error + + beforeEach(async function() { + GcsFile.copy = sinon.stub().rejects(NotFoundError) + try { + await GcsPersistor.promises.copyFile(bucket, key, destKey) + } catch (err) { + error = err + } + }) + + it('should throw a NotFoundError', function() { + expect(error).to.be.an.instanceOf(Errors.NotFoundError) + }) + }) + }) + + describe('deleteFile', function() { + describe('with valid parameters', function() { + beforeEach(async function() { + return GcsPersistor.promises.deleteFile(bucket, key) + }) + + it('should delete the object', function() { + expect(Storage.prototype.bucket).to.have.been.calledWith(bucket) + expect(GcsBucket.file).to.have.been.calledWith(key) + expect(GcsFile.delete).to.have.been.called + }) + }) + + describe('when the file does not exist', function() { + let error + + beforeEach(async function() { + GcsFile.delete = sinon.stub().rejects(NotFoundError) + try { + await GcsPersistor.promises.deleteFile(bucket, key) + } catch (err) { + error = err + } + }) + + it('should not throw an error', function() { + expect(error).not.to.exist + }) + }) + }) + + describe('deleteDirectory', function() { + describe('with valid parameters', function() { + beforeEach(async function() { + return GcsPersistor.promises.deleteDirectory(bucket, key) + }) + + it('should list the objects in the directory', function() { + expect(Storage.prototype.bucket).to.have.been.calledWith(bucket) + expect(GcsBucket.getFiles).to.have.been.calledWith({ + directory: key + }) + }) + + it('should delete the objects', function() { + expect(files[0].delete).to.have.been.called + expect(files[1].delete).to.have.been.called + }) + }) + + describe('when there are no files', function() { + beforeEach(async function() { + GcsBucket.getFiles.resolves([[]]) + return GcsPersistor.promises.deleteDirectory(bucket, key) + }) + + it('should list the objects in the directory', function() { + expect(GcsBucket.getFiles).to.have.been.calledWith({ + directory: key + }) + }) + + it('should not try to delete any objects', function() { + expect(files[0].delete).not.to.have.been.called + expect(files[1].delete).not.to.have.been.called + }) + }) + + describe('when there is an error listing the objects', function() { + let error + + beforeEach(async function() { + GcsBucket.getFiles = sinon.stub().rejects(genericError) + try { + await GcsPersistor.promises.deleteDirectory(bucket, key) + } catch (err) { + error = err + } + }) + + it('should generate a ReadError', function() { + expect(error).to.be.an.instanceOf(Errors.ReadError) + }) + + it('should wrap the error', function() { + expect(error.cause).to.equal(genericError) + }) + + it('should not try to delete any objects', function() { + expect(files[0].delete).not.to.have.been.called + }) + }) + + describe('when there is an error deleting the objects', function() { + let error + + beforeEach(async function() { + files[0].delete = sinon.stub().rejects(genericError) + try { + await GcsPersistor.promises.deleteDirectory(bucket, key) + } catch (err) { + error = err + } + }) + + it('should generate a WriteError', function() { + expect(error).to.be.an.instanceOf(Errors.WriteError) + }) + + it('should wrap the error', function() { + expect(error.cause).to.equal(genericError) + }) + }) + }) + + describe('directorySize', function() { + describe('with valid parameters', function() { + let size + + beforeEach(async function() { + size = await GcsPersistor.promises.directorySize(bucket, key) + }) + + it('should list the objects in the directory', function() { + expect(Storage.prototype.bucket).to.have.been.calledWith(bucket) + expect(GcsBucket.getFiles).to.have.been.calledWith({ directory: key }) + }) + + it('should return the directory size', function() { + expect(size).to.equal(filesSize) + }) + }) + + describe('when there are no files', function() { + let size + + beforeEach(async function() { + GcsBucket.getFiles.resolves([[]]) + size = await GcsPersistor.promises.directorySize(bucket, key) + }) + + it('should list the objects in the directory', function() { + expect(Storage.prototype.bucket).to.have.been.calledWith(bucket) + expect(GcsBucket.getFiles).to.have.been.calledWith({ directory: key }) + }) + + it('should return zero', function() { + expect(size).to.equal(0) + }) + }) + + describe('when there is an error listing the objects', function() { + let error + + beforeEach(async function() { + GcsBucket.getFiles.rejects(genericError) + try { + await GcsPersistor.promises.directorySize(bucket, key) + } catch (err) { + error = err + } + }) + + it('should generate a ReadError', function() { + expect(error).to.be.an.instanceOf(Errors.ReadError) + }) + + it('should wrap the error', function() { + expect(error.cause).to.equal(genericError) + }) + }) + }) + + describe('checkIfFileExists', function() { + describe('when the file exists', function() { + let exists + + beforeEach(async function() { + exists = await GcsPersistor.promises.checkIfFileExists(bucket, key) + }) + + it('should ask the file if it exists', function() { + expect(Storage.prototype.bucket).to.have.been.calledWith(bucket) + expect(GcsBucket.file).to.have.been.calledWith(key) + expect(GcsFile.exists).to.have.been.called + }) + + it('should return that the file exists', function() { + expect(exists).to.equal(true) + }) + }) + + describe('when the file does not exist', function() { + let exists + + beforeEach(async function() { + GcsFile.exists = sinon.stub().resolves([false]) + exists = await GcsPersistor.promises.checkIfFileExists(bucket, key) + }) + + it('should get the object header', function() { + expect(Storage.prototype.bucket).to.have.been.calledWith(bucket) + expect(GcsBucket.file).to.have.been.calledWith(key) + expect(GcsFile.exists).to.have.been.called + }) + + it('should return that the file does not exist', function() { + expect(exists).to.equal(false) + }) + }) + + describe('when there is an error', function() { + let error + + beforeEach(async function() { + GcsFile.exists = sinon.stub().rejects(genericError) + try { + await GcsPersistor.promises.checkIfFileExists(bucket, key) + } catch (err) { + error = err + } + }) + + it('should generate a ReadError', function() { + expect(error).to.be.an.instanceOf(Errors.ReadError) + }) + + it('should wrap the error', function() { + expect(error.cause).to.equal(genericError) + }) + }) + }) +}) From 6979b8638aa899f055d34bb0d0c00a602fe4a386 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Wed, 4 Mar 2020 15:42:16 +0000 Subject: [PATCH 10/31] Add 'Buffer' global for GCS unit tests --- services/filestore/test/unit/js/GcsPersistorTests.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/filestore/test/unit/js/GcsPersistorTests.js b/services/filestore/test/unit/js/GcsPersistorTests.js index 6264fad0ff..1a22971ddd 100644 --- a/services/filestore/test/unit/js/GcsPersistorTests.js +++ b/services/filestore/test/unit/js/GcsPersistorTests.js @@ -138,7 +138,7 @@ describe('GcsPersistorTests', function() { 'metrics-sharelatex': Metrics, crypto }, - globals: { console } + globals: { console, Buffer } }) }) From 76243fd75aff9e7409939299fc067b2ae33db134 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Wed, 4 Mar 2020 16:04:26 +0000 Subject: [PATCH 11/31] 'bucket' -> 'bucketName' in GCS Persistor --- services/filestore/app/js/GcsPersistor.js | 70 ++++++++++--------- .../test/unit/js/GcsPersistorTests.js | 4 +- 2 files changed, 39 insertions(+), 35 deletions(-) diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index 690df70252..dd2c44d5ba 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -53,7 +53,7 @@ const GcsPersistor = { module.exports = GcsPersistor -async function sendFile(bucket, key, fsPath) { +async function sendFile(bucketName, key, fsPath) { let readStream try { readStream = fs.createReadStream(fsPath) @@ -61,14 +61,14 @@ async function sendFile(bucket, key, fsPath) { throw PersistorHelper.wrapError( err, 'error reading file from disk', - { bucketName: bucket, key, fsPath }, + { bucketName, key, fsPath }, ReadError ) } - return sendStream(bucket, key, readStream) + return sendStream(bucketName, key, readStream) } -async function sendStream(bucket, key, readStream, sourceMd5) { +async function sendStream(bucketName, key, readStream, sourceMd5) { try { let hashPromise @@ -96,7 +96,7 @@ async function sendStream(bucket, key, readStream, sourceMd5) { } const uploadStream = storage - .bucket(bucket) + .bucket(bucketName) .file(key) .createWriteStream(writeOptions) @@ -107,25 +107,25 @@ async function sendStream(bucket, key, readStream, sourceMd5) { if (hashPromise) { sourceMd5 = await hashPromise // throws on mismatch - await PersistorHelper.verifyMd5(GcsPersistor, bucket, key, sourceMd5) + await PersistorHelper.verifyMd5(GcsPersistor, bucketName, key, sourceMd5) } } catch (err) { throw PersistorHelper.wrapError( err, 'upload to GCS failed', - { bucket, key }, + { bucketName, key }, WriteError ) } } -async function getFileStream(bucket, key, opts = {}) { +async function getFileStream(bucketName, key, opts = {}) { if (opts.end) { // S3 (and http range headers) treat 'end' as inclusive, so increase this by 1 opts.end++ } const stream = storage - .bucket(bucket) + .bucket(bucketName) .file(key) .createReadStream(opts) @@ -141,16 +141,16 @@ async function getFileStream(bucket, key, opts = {}) { throw PersistorHelper.wrapError( err, 'error reading file from GCS', - { bucket, key, opts }, + { bucketName, key, opts }, ReadError ) } } -async function getFileSize(bucket, key) { +async function getFileSize(bucketName, key) { try { const metadata = await storage - .bucket(bucket) + .bucket(bucketName) .file(key) .getMetadata() return metadata[0].size @@ -158,16 +158,16 @@ async function getFileSize(bucket, key) { throw PersistorHelper.wrapError( err, 'error getting size of GCS object', - { bucket, key }, + { bucketName, key }, ReadError ) } } -async function getFileMd5Hash(bucket, key) { +async function getFileMd5Hash(bucketName, key) { try { const metadata = await storage - .bucket(bucket) + .bucket(bucketName) .file(key) .getMetadata() return PersistorHelper.base64ToHex(metadata[0].md5Hash) @@ -175,23 +175,23 @@ async function getFileMd5Hash(bucket, key) { throw PersistorHelper.wrapError( err, 'error getting hash of GCS object', - { bucket, key }, + { bucketName, key }, ReadError ) } } -async function deleteFile(bucket, key) { +async function deleteFile(bucketName, key) { try { await storage - .bucket(bucket) + .bucket(bucketName) .file(key) .delete() } catch (err) { const error = PersistorHelper.wrapError( err, 'error deleting GCS object', - { bucket, key }, + { bucketName, key }, WriteError ) if (!(error instanceof NotFoundError)) { @@ -200,17 +200,19 @@ async function deleteFile(bucket, key) { } } -async function deleteDirectory(bucket, key) { +async function deleteDirectory(bucketName, key) { let files try { - const response = await storage.bucket(bucket).getFiles({ directory: key }) + const response = await storage + .bucket(bucketName) + .getFiles({ directory: key }) files = response[0] } catch (err) { const error = PersistorHelper.wrapError( err, 'failed to list objects in GCS', - { bucket, key }, + { bucketName, key }, ReadError ) if (error instanceof NotFoundError) { @@ -226,7 +228,7 @@ async function deleteDirectory(bucket, key) { const error = PersistorHelper.wrapError( err, 'failed to delete object in GCS', - { bucket, key }, + { bucketName, key }, WriteError ) if (!(error instanceof NotFoundError)) { @@ -236,17 +238,19 @@ async function deleteDirectory(bucket, key) { } } -async function directorySize(bucket, key) { +async function directorySize(bucketName, key) { let files try { - const response = await storage.bucket(bucket).getFiles({ directory: key }) + const response = await storage + .bucket(bucketName) + .getFiles({ directory: key }) files = response[0] } catch (err) { throw PersistorHelper.wrapError( err, 'failed to list objects in GCS', - { bucket, key }, + { bucketName, key }, ReadError ) } @@ -254,10 +258,10 @@ async function directorySize(bucket, key) { return files.reduce((acc, file) => Number(file.metadata.size) + acc, 0) } -async function checkIfFileExists(bucket, key) { +async function checkIfFileExists(bucketName, key) { try { const response = await storage - .bucket(bucket) + .bucket(bucketName) .file(key) .exists() return response[0] @@ -265,16 +269,16 @@ async function checkIfFileExists(bucket, key) { throw PersistorHelper.wrapError( err, 'error checking if file exists in GCS', - { bucket, key }, + { bucketName, key }, ReadError ) } } -async function copyFile(bucket, sourceKey, destKey) { +async function copyFile(bucketName, sourceKey, destKey) { try { - const src = storage.bucket(bucket).file(sourceKey) - const dest = storage.bucket(bucket).file(destKey) + const src = storage.bucket(bucketName).file(sourceKey) + const dest = storage.bucket(bucketName).file(destKey) await src.copy(dest) } catch (err) { // fake-gcs-server has a bug that returns an invalid response when the file does not exist @@ -284,7 +288,7 @@ async function copyFile(bucket, sourceKey, destKey) { throw PersistorHelper.wrapError( err, 'failed to copy file in GCS', - { bucket, sourceKey, destKey }, + { bucketName, sourceKey, destKey }, WriteError ) } diff --git a/services/filestore/test/unit/js/GcsPersistorTests.js b/services/filestore/test/unit/js/GcsPersistorTests.js index 1a22971ddd..97b049e833 100644 --- a/services/filestore/test/unit/js/GcsPersistorTests.js +++ b/services/filestore/test/unit/js/GcsPersistorTests.js @@ -220,7 +220,7 @@ describe('GcsPersistorTests', function() { }) it('stores the bucket and key in the error', function() { - expect(error.info).to.include({ bucket: bucket, key: key }) + expect(error.info).to.include({ bucketName: bucket, key: key }) }) }) @@ -250,7 +250,7 @@ describe('GcsPersistorTests', function() { }) it('stores the bucket and key in the error', function() { - expect(error.info).to.include({ bucket: bucket, key: key }) + expect(error.info).to.include({ bucketName: bucket, key: key }) }) }) }) From def383574ea03f8e2899d0825071c25d63b4a7ec Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Wed, 4 Mar 2020 16:17:36 +0000 Subject: [PATCH 12/31] Simplify getMeteredStream to record metric directly --- services/filestore/app/js/GcsPersistor.js | 10 ++-------- services/filestore/app/js/PersistorHelper.js | 9 +++++---- services/filestore/app/js/S3Persistor.js | 14 ++------------ .../filestore/test/unit/js/FSPersistorTests.js | 3 ++- 4 files changed, 11 insertions(+), 25 deletions(-) diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index dd2c44d5ba..9ddf608e90 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -1,5 +1,4 @@ const settings = require('settings-sharelatex') -const metrics = require('metrics-sharelatex') const fs = require('fs') const { promisify } = require('util') const Stream = require('stream') @@ -79,9 +78,7 @@ async function sendStream(bucketName, key, readStream, sourceMd5) { const meteredStream = PersistorHelper.getMeteredStream( readStream, - (_, byteCount) => { - metrics.count('gcs.egress', byteCount) - } + 'gcs.egress' ) const writeOptions = { @@ -129,10 +126,7 @@ async function getFileStream(bucketName, key, opts = {}) { .file(key) .createReadStream(opts) - const meteredStream = PersistorHelper.getMeteredStream(stream, (_, bytes) => { - // ignore the error parameter and just log the byte count - metrics.count('gcs.ingress', bytes) - }) + const meteredStream = PersistorHelper.getMeteredStream(stream, 'gcs.ingress') try { await PersistorHelper.waitForStreamReady(stream) diff --git a/services/filestore/app/js/PersistorHelper.js b/services/filestore/app/js/PersistorHelper.js index 409f3182f1..a19311e889 100644 --- a/services/filestore/app/js/PersistorHelper.js +++ b/services/filestore/app/js/PersistorHelper.js @@ -1,4 +1,5 @@ const crypto = require('crypto') +const metrics = require('metrics-sharelatex') const meter = require('stream-meter') const Stream = require('stream') const logger = require('logger-sharelatex') @@ -54,16 +55,16 @@ async function verifyMd5(persistor, bucket, key, sourceMd5, destMd5 = null) { // returns the next stream in the pipeline, and calls the callback with the byte count // when the stream finishes or receives an error -function getMeteredStream(stream, callback) { +function getMeteredStream(stream, metricName) { const meteredStream = meter() pipeline(stream, meteredStream) .then(() => { - callback(null, meteredStream.bytes) + metrics.count(metricName, meteredStream.bytes) }) - .catch(err => { + .catch(() => { // on error, just send how many bytes we received before the stream stopped - callback(err, meteredStream.bytes) + metrics.count(metricName, meteredStream.bytes) }) return meteredStream diff --git a/services/filestore/app/js/S3Persistor.js b/services/filestore/app/js/S3Persistor.js index 7b69ad5d26..e50a9e3030 100644 --- a/services/filestore/app/js/S3Persistor.js +++ b/services/filestore/app/js/S3Persistor.js @@ -4,7 +4,6 @@ http.globalAgent.maxSockets = 300 https.globalAgent.maxSockets = 300 const settings = require('settings-sharelatex') -const metrics = require('metrics-sharelatex') const PersistorHelper = require('./PersistorHelper') @@ -75,10 +74,7 @@ async function sendStream(bucketName, key, readStream, sourceMd5) { const meteredStream = PersistorHelper.getMeteredStream( readStream, - (_, byteCount) => { - // ignore the error parameter and just log the byte count - metrics.count('s3.egress', byteCount) - } + 's3.egress' ) // if we have an md5 hash, pass this to S3 to verify the upload @@ -143,13 +139,7 @@ async function getFileStream(bucketName, key, opts) { .getObject(params) .createReadStream() - const meteredStream = PersistorHelper.getMeteredStream( - stream, - (_, byteCount) => { - // ignore the error parameter and just log the byte count - metrics.count('s3.ingress', byteCount) - } - ) + const meteredStream = PersistorHelper.getMeteredStream(stream, 's3.ingress') try { await PersistorHelper.waitForStreamReady(stream) diff --git a/services/filestore/test/unit/js/FSPersistorTests.js b/services/filestore/test/unit/js/FSPersistorTests.js index 0a09869bc0..4dd5a2fa11 100644 --- a/services/filestore/test/unit/js/FSPersistorTests.js +++ b/services/filestore/test/unit/js/FSPersistorTests.js @@ -73,7 +73,8 @@ describe('FSPersistorTests', function() { crypto, // imported by PersistorHelper but otherwise unused here 'stream-meter': {}, - 'logger-sharelatex': {} + 'logger-sharelatex': {}, + 'metrics-sharelatex': {} }, globals: { console } }) From a7198764cbeb630513a67673b3f15cfc92f9a38b Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Wed, 4 Mar 2020 16:25:11 +0000 Subject: [PATCH 13/31] Improve/add some comments for clarity --- services/filestore/app/js/GcsPersistor.js | 10 +++++++--- services/filestore/app/js/S3Persistor.js | 7 +++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index 9ddf608e90..61b3e814b0 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -78,11 +78,12 @@ async function sendStream(bucketName, key, readStream, sourceMd5) { const meteredStream = PersistorHelper.getMeteredStream( readStream, - 'gcs.egress' + 'gcs.egress' // egress from us to gcs ) const writeOptions = { - resumable: false // recommended by Google + // disabling of resumable uploads is recommended by Google: + resumable: false } if (sourceMd5) { @@ -126,7 +127,10 @@ async function getFileStream(bucketName, key, opts = {}) { .file(key) .createReadStream(opts) - const meteredStream = PersistorHelper.getMeteredStream(stream, 'gcs.ingress') + const meteredStream = PersistorHelper.getMeteredStream( + stream, + 'gcs.ingress' // ingress to us from gcs + ) try { await PersistorHelper.waitForStreamReady(stream) diff --git a/services/filestore/app/js/S3Persistor.js b/services/filestore/app/js/S3Persistor.js index e50a9e3030..4403386716 100644 --- a/services/filestore/app/js/S3Persistor.js +++ b/services/filestore/app/js/S3Persistor.js @@ -74,7 +74,7 @@ async function sendStream(bucketName, key, readStream, sourceMd5) { const meteredStream = PersistorHelper.getMeteredStream( readStream, - 's3.egress' + 's3.egress' // egress from us to s3 ) // if we have an md5 hash, pass this to S3 to verify the upload @@ -139,7 +139,10 @@ async function getFileStream(bucketName, key, opts) { .getObject(params) .createReadStream() - const meteredStream = PersistorHelper.getMeteredStream(stream, 's3.ingress') + const meteredStream = PersistorHelper.getMeteredStream( + stream, + 's3.ingress' // ingress to us from s3 + ) try { await PersistorHelper.waitForStreamReady(stream) From 30114cd79b237ddfdb2d6a986a7a60c57dc8798c Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Wed, 4 Mar 2020 16:38:05 +0000 Subject: [PATCH 14/31] Remove unnecessary try/catch around 'createReadStream' --- services/filestore/app/js/GcsPersistor.js | 13 +----- services/filestore/app/js/S3Persistor.js | 13 +----- .../test/unit/js/GcsPersistorTests.js | 42 ------------------- .../test/unit/js/S3PersistorTests.js | 42 ------------------- 4 files changed, 2 insertions(+), 108 deletions(-) diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index 61b3e814b0..50cac9bda8 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -53,18 +53,7 @@ const GcsPersistor = { module.exports = GcsPersistor async function sendFile(bucketName, key, fsPath) { - let readStream - try { - readStream = fs.createReadStream(fsPath) - } catch (err) { - throw PersistorHelper.wrapError( - err, - 'error reading file from disk', - { bucketName, key, fsPath }, - ReadError - ) - } - return sendStream(bucketName, key, readStream) + return sendStream(bucketName, key, fs.createReadStream(fsPath)) } async function sendStream(bucketName, key, readStream, sourceMd5) { diff --git a/services/filestore/app/js/S3Persistor.js b/services/filestore/app/js/S3Persistor.js index 4403386716..1b92a61ae6 100644 --- a/services/filestore/app/js/S3Persistor.js +++ b/services/filestore/app/js/S3Persistor.js @@ -46,18 +46,7 @@ const S3Persistor = { module.exports = S3Persistor async function sendFile(bucketName, key, fsPath) { - let readStream - try { - readStream = fs.createReadStream(fsPath) - } catch (err) { - throw PersistorHelper.wrapError( - err, - 'error reading file from disk', - { bucketName, key, fsPath }, - ReadError - ) - } - return sendStream(bucketName, key, readStream) + return sendStream(bucketName, key, fs.createReadStream(fsPath)) } async function sendStream(bucketName, key, readStream, sourceMd5) { diff --git a/services/filestore/test/unit/js/GcsPersistorTests.js b/services/filestore/test/unit/js/GcsPersistorTests.js index 97b049e833..bdd7e1d562 100644 --- a/services/filestore/test/unit/js/GcsPersistorTests.js +++ b/services/filestore/test/unit/js/GcsPersistorTests.js @@ -440,48 +440,6 @@ describe('GcsPersistorTests', function() { ) }) }) - - describe('when the file does not exist', function() { - let error - - beforeEach(async function() { - Fs.createReadStream = sinon.stub().throws(FileNotFoundError) - try { - await GcsPersistor.promises.sendFile(bucket, key, filename) - } catch (err) { - error = err - } - }) - - it('returns a NotFoundError', function() { - expect(error).to.be.an.instanceOf(Errors.NotFoundError) - }) - - it('wraps the error', function() { - expect(error.cause).to.equal(FileNotFoundError) - }) - }) - - describe('when reading the file throws an error', function() { - let error - - beforeEach(async function() { - Fs.createReadStream = sinon.stub().throws(genericError) - try { - await GcsPersistor.promises.sendFile(bucket, key, filename) - } catch (err) { - error = err - } - }) - - it('returns a ReadError', function() { - expect(error).to.be.an.instanceOf(Errors.ReadError) - }) - - it('wraps the error', function() { - expect(error.cause).to.equal(genericError) - }) - }) }) describe('copyFile', function() { diff --git a/services/filestore/test/unit/js/S3PersistorTests.js b/services/filestore/test/unit/js/S3PersistorTests.js index aa9444428e..484a0209a8 100644 --- a/services/filestore/test/unit/js/S3PersistorTests.js +++ b/services/filestore/test/unit/js/S3PersistorTests.js @@ -583,48 +583,6 @@ describe('S3PersistorTests', function() { }) }) }) - - describe('when the file does not exist', function() { - let error - - beforeEach(async function() { - Fs.createReadStream = sinon.stub().throws(FileNotFoundError) - try { - await S3Persistor.promises.sendFile(bucket, key, filename) - } catch (err) { - error = err - } - }) - - it('returns a NotFoundError', function() { - expect(error).to.be.an.instanceOf(Errors.NotFoundError) - }) - - it('wraps the error', function() { - expect(error.cause).to.equal(FileNotFoundError) - }) - }) - - describe('when reading the file throws an error', function() { - let error - - beforeEach(async function() { - Fs.createReadStream = sinon.stub().throws(genericError) - try { - await S3Persistor.promises.sendFile(bucket, key, filename) - } catch (err) { - error = err - } - }) - - it('returns a ReadError', function() { - expect(error).to.be.an.instanceOf(Errors.ReadError) - }) - - it('wraps the error', function() { - expect(error.cause).to.equal(genericError) - }) - }) }) describe('copyFile', function() { From 3bb956b38e74bfd39846905839aacb0061e9c4af Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Wed, 4 Mar 2020 17:01:20 +0000 Subject: [PATCH 15/31] Use http for the fake GCS server --- services/filestore/app/js/GcsPersistor.js | 3 +++ services/filestore/docker-compose.ci.yml | 4 ++-- services/filestore/docker-compose.yml | 4 ++-- services/filestore/test/acceptance/deps/Dockerfile.fake-gcs | 2 +- services/filestore/test/acceptance/js/TestConfig.js | 1 + 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index 50cac9bda8..d81fc40b03 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -19,6 +19,9 @@ if (settings.filestore.gcs && settings.filestore.gcs.apiEndpoint) { request: function(reqOpts) { const url = new URL(reqOpts.uri) url.host = settings.filestore.gcs.apiEndpoint + if (settings.filestore.gcs.apiScheme) { + url.protocol = settings.filestore.gcs.apiScheme + } reqOpts.uri = url.toString() return reqOpts } diff --git a/services/filestore/docker-compose.ci.yml b/services/filestore/docker-compose.ci.yml index fe4eaa35fd..d3accee799 100644 --- a/services/filestore/docker-compose.ci.yml +++ b/services/filestore/docker-compose.ci.yml @@ -34,11 +34,11 @@ services: AWS_SECRET_ACCESS_KEY: fake AWS_S3_PATH_STYLE: 'true' GCS_API_ENDPOINT: gcs:9090 + GCS_API_SCHEME: http GCS_USER_FILES_BUCKET_NAME: fake_userfiles GCS_TEMPLATE_FILES_BUCKET_NAME: fake_templatefiles GCS_PUBLIC_FILES_BUCKET_NAME: fake_publicfiles - NODE_TLS_REJECT_UNAUTHORIZED: 0 - STORAGE_EMULATOR_HOST: https://gcs:9090/storage/v1 + STORAGE_EMULATOR_HOST: http://gcs:9090/storage/v1 depends_on: s3: condition: service_healthy diff --git a/services/filestore/docker-compose.yml b/services/filestore/docker-compose.yml index d904574f84..54ef9c00c9 100644 --- a/services/filestore/docker-compose.yml +++ b/services/filestore/docker-compose.yml @@ -44,11 +44,11 @@ services: AWS_ACCESS_KEY_ID: fake AWS_SECRET_ACCESS_KEY: fake GCS_API_ENDPOINT: gcs:9090 + GCS_API_SCHEME: http GCS_USER_FILES_BUCKET_NAME: fake_userfiles GCS_TEMPLATE_FILES_BUCKET_NAME: fake_templatefiles GCS_PUBLIC_FILES_BUCKET_NAME: fake_publicfiles - NODE_TLS_REJECT_UNAUTHORIZED: 0 - STORAGE_EMULATOR_HOST: https://gcs:9090/storage/v1 + STORAGE_EMULATOR_HOST: http://gcs:9090/storage/v1 user: node depends_on: s3: diff --git a/services/filestore/test/acceptance/deps/Dockerfile.fake-gcs b/services/filestore/test/acceptance/deps/Dockerfile.fake-gcs index f8313cbce0..6acb2d63b4 100644 --- a/services/filestore/test/acceptance/deps/Dockerfile.fake-gcs +++ b/services/filestore/test/acceptance/deps/Dockerfile.fake-gcs @@ -2,4 +2,4 @@ FROM fsouza/fake-gcs-server RUN apk add --update --no-cache curl COPY healthcheck.sh /healthcheck.sh HEALTHCHECK --interval=1s --timeout=1s --retries=30 CMD /healthcheck.sh http://localhost:9090 -CMD ["--port=9090"] +CMD ["--port=9090", "--scheme=http"] diff --git a/services/filestore/test/acceptance/js/TestConfig.js b/services/filestore/test/acceptance/js/TestConfig.js index e673ace71c..fd7d0f034c 100644 --- a/services/filestore/test/acceptance/js/TestConfig.js +++ b/services/filestore/test/acceptance/js/TestConfig.js @@ -22,6 +22,7 @@ function s3Stores() { function gcsConfig() { return { apiEndpoint: process.env.GCS_API_ENDPOINT, + apiScheme: process.env.GCS_API_SCHEME, projectId: 'fake' } } From 460dd96b179c9d6e8ca955f10d877dd9830275e8 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Thu, 5 Mar 2020 13:45:46 +0000 Subject: [PATCH 16/31] Cosmetic clean-up of GCS Persistor & tests --- services/filestore/app/js/GcsPersistor.js | 20 ++++++++--------- .../test/unit/js/GcsPersistorTests.js | 22 +++++++++---------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index d81fc40b03..f4702e06a4 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -139,11 +139,11 @@ async function getFileStream(bucketName, key, opts = {}) { async function getFileSize(bucketName, key) { try { - const metadata = await storage + const [metadata] = await storage .bucket(bucketName) .file(key) .getMetadata() - return metadata[0].size + return metadata.size } catch (err) { throw PersistorHelper.wrapError( err, @@ -156,11 +156,11 @@ async function getFileSize(bucketName, key) { async function getFileMd5Hash(bucketName, key) { try { - const metadata = await storage + const [metadata] = await storage .bucket(bucketName) .file(key) .getMetadata() - return PersistorHelper.base64ToHex(metadata[0].md5Hash) + return PersistorHelper.base64ToHex(metadata.md5Hash) } catch (err) { throw PersistorHelper.wrapError( err, @@ -194,10 +194,10 @@ async function deleteDirectory(bucketName, key) { let files try { - const response = await storage + const [response] = await storage .bucket(bucketName) .getFiles({ directory: key }) - files = response[0] + files = response } catch (err) { const error = PersistorHelper.wrapError( err, @@ -232,10 +232,10 @@ async function directorySize(bucketName, key) { let files try { - const response = await storage + const [response] = await storage .bucket(bucketName) .getFiles({ directory: key }) - files = response[0] + files = response } catch (err) { throw PersistorHelper.wrapError( err, @@ -250,11 +250,11 @@ async function directorySize(bucketName, key) { async function checkIfFileExists(bucketName, key) { try { - const response = await storage + const [response] = await storage .bucket(bucketName) .file(key) .exists() - return response[0] + return response } catch (err) { throw PersistorHelper.wrapError( err, diff --git a/services/filestore/test/unit/js/GcsPersistorTests.js b/services/filestore/test/unit/js/GcsPersistorTests.js index bdd7e1d562..ec071192e2 100644 --- a/services/filestore/test/unit/js/GcsPersistorTests.js +++ b/services/filestore/test/unit/js/GcsPersistorTests.js @@ -21,7 +21,7 @@ describe('GcsPersistorTests', function() { Logger, Storage, Fs, - NotFoundError, + GcsNotFoundError, Meter, MeteredStream, ReadStream, @@ -94,8 +94,8 @@ describe('GcsPersistorTests', function() { } Storage.prototype.bucket = sinon.stub().returns(GcsBucket) - NotFoundError = new Error('File not found') - NotFoundError.code = 404 + GcsNotFoundError = new Error('File not found') + GcsNotFoundError.code = 404 Fs = { createReadStream: sinon.stub().returns(ReadStream) @@ -199,7 +199,7 @@ describe('GcsPersistorTests', function() { beforeEach(async function() { ReadStream.on = sinon.stub() - ReadStream.on.withArgs('error').yields(NotFoundError) + ReadStream.on.withArgs('error').yields(GcsNotFoundError) try { stream = await GcsPersistor.promises.getFileStream(bucket, key) } catch (err) { @@ -278,7 +278,7 @@ describe('GcsPersistorTests', function() { let error beforeEach(async function() { - GcsFile.getMetadata = sinon.stub().rejects(NotFoundError) + GcsFile.getMetadata = sinon.stub().rejects(GcsNotFoundError) try { await GcsPersistor.promises.getFileSize(bucket, key) } catch (err) { @@ -291,7 +291,7 @@ describe('GcsPersistorTests', function() { }) it('should wrap the error', function() { - expect(error.cause).to.equal(NotFoundError) + expect(error.cause).to.equal(GcsNotFoundError) }) }) @@ -443,10 +443,10 @@ describe('GcsPersistorTests', function() { }) describe('copyFile', function() { - const DestinationFile = 'destFile' + const destinationFile = 'destFile' beforeEach(function() { - GcsBucket.file.withArgs(destKey).returns(DestinationFile) + GcsBucket.file.withArgs(destKey).returns(destinationFile) }) describe('with valid parameters', function() { @@ -457,7 +457,7 @@ describe('GcsPersistorTests', function() { it('should copy the object', function() { expect(Storage.prototype.bucket).to.have.been.calledWith(bucket) expect(GcsBucket.file).to.have.been.calledWith(key) - expect(GcsFile.copy).to.have.been.calledWith(DestinationFile) + expect(GcsFile.copy).to.have.been.calledWith(destinationFile) }) }) @@ -465,7 +465,7 @@ describe('GcsPersistorTests', function() { let error beforeEach(async function() { - GcsFile.copy = sinon.stub().rejects(NotFoundError) + GcsFile.copy = sinon.stub().rejects(GcsNotFoundError) try { await GcsPersistor.promises.copyFile(bucket, key, destKey) } catch (err) { @@ -496,7 +496,7 @@ describe('GcsPersistorTests', function() { let error beforeEach(async function() { - GcsFile.delete = sinon.stub().rejects(NotFoundError) + GcsFile.delete = sinon.stub().rejects(GcsNotFoundError) try { await GcsPersistor.promises.deleteFile(bucket, key) } catch (err) { From eb93ae4b10fb89b0d06eec196f1f132a67209aeb Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Thu, 5 Mar 2020 14:12:15 +0000 Subject: [PATCH 17/31] Use Bucket.deleteFiles to delete directory contents, instead of iterating --- services/filestore/app/js/GcsPersistor.js | 32 ++++------ .../test/unit/js/GcsPersistorTests.js | 60 +++---------------- 2 files changed, 17 insertions(+), 75 deletions(-) diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index f4702e06a4..a2525361c2 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -191,41 +191,29 @@ async function deleteFile(bucketName, key) { } async function deleteDirectory(bucketName, key) { - let files + if (!key.match(/^[a-z0-9_-]+/i)) { + throw new NotFoundError({ + message: 'deleteDirectoryKey is invalid or missing', + info: { bucketName, key } + }) + } try { - const [response] = await storage + await storage .bucket(bucketName) - .getFiles({ directory: key }) - files = response + .deleteFiles({ directory: key, force: true }) } catch (err) { const error = PersistorHelper.wrapError( err, - 'failed to list objects in GCS', + 'failed to delete directory in GCS', { bucketName, key }, - ReadError + WriteError ) if (error instanceof NotFoundError) { return } throw error } - - for (const index in files) { - try { - await files[index].delete() - } catch (err) { - const error = PersistorHelper.wrapError( - err, - 'failed to delete object in GCS', - { bucketName, key }, - WriteError - ) - if (!(error instanceof NotFoundError)) { - throw error - } - } - } } async function directorySize(bucketName, key) { diff --git a/services/filestore/test/unit/js/GcsPersistorTests.js b/services/filestore/test/unit/js/GcsPersistorTests.js index ec071192e2..a63296a18f 100644 --- a/services/filestore/test/unit/js/GcsPersistorTests.js +++ b/services/filestore/test/unit/js/GcsPersistorTests.js @@ -84,7 +84,8 @@ describe('GcsPersistorTests', function() { GcsBucket = { file: sinon.stub().returns(GcsFile), - getFiles: sinon.stub().resolves([files]) + getFiles: sinon.stub().resolves([files]), + deleteFiles: sinon.stub().resolves() } Storage = class { @@ -516,67 +517,20 @@ describe('GcsPersistorTests', function() { return GcsPersistor.promises.deleteDirectory(bucket, key) }) - it('should list the objects in the directory', function() { + it('should delete the objects in the directory', function() { expect(Storage.prototype.bucket).to.have.been.calledWith(bucket) - expect(GcsBucket.getFiles).to.have.been.calledWith({ - directory: key + expect(GcsBucket.deleteFiles).to.have.been.calledWith({ + directory: key, + force: true }) }) - - it('should delete the objects', function() { - expect(files[0].delete).to.have.been.called - expect(files[1].delete).to.have.been.called - }) - }) - - describe('when there are no files', function() { - beforeEach(async function() { - GcsBucket.getFiles.resolves([[]]) - return GcsPersistor.promises.deleteDirectory(bucket, key) - }) - - it('should list the objects in the directory', function() { - expect(GcsBucket.getFiles).to.have.been.calledWith({ - directory: key - }) - }) - - it('should not try to delete any objects', function() { - expect(files[0].delete).not.to.have.been.called - expect(files[1].delete).not.to.have.been.called - }) - }) - - describe('when there is an error listing the objects', function() { - let error - - beforeEach(async function() { - GcsBucket.getFiles = sinon.stub().rejects(genericError) - try { - await GcsPersistor.promises.deleteDirectory(bucket, key) - } catch (err) { - error = err - } - }) - - it('should generate a ReadError', function() { - expect(error).to.be.an.instanceOf(Errors.ReadError) - }) - - it('should wrap the error', function() { - expect(error.cause).to.equal(genericError) - }) - - it('should not try to delete any objects', function() { - expect(files[0].delete).not.to.have.been.called - }) }) describe('when there is an error deleting the objects', function() { let error beforeEach(async function() { - files[0].delete = sinon.stub().rejects(genericError) + GcsBucket.deleteFiles = sinon.stub().rejects(genericError) try { await GcsPersistor.promises.deleteDirectory(bucket, key) } catch (err) { From 2509b51883228b57f9875bc271048e366880d6fc Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Thu, 5 Mar 2020 17:23:47 +0000 Subject: [PATCH 18/31] Add optional gcs config to override gcs settings (for fake gcs server) --- services/filestore/config/settings.defaults.coffee | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/services/filestore/config/settings.defaults.coffee b/services/filestore/config/settings.defaults.coffee index 6ffe5a8523..0fe98effb1 100644 --- a/services/filestore/config/settings.defaults.coffee +++ b/services/filestore/config/settings.defaults.coffee @@ -34,6 +34,12 @@ settings = # gcs - Google Cloud Storage backend: process.env['BACKEND'] + gcs: + if process.env['GCS_API_ENDPOINT'] + apiEndpoint: process.env['GCS_API_ENDPOINT'] + apiScheme: process.env['GCS_API_SCHEME'] + projectId: process.env['GCS_PROJECT_ID'] + s3: if process.env['AWS_ACCESS_KEY_ID']? or process.env['S3_BUCKET_CREDENTIALS']? key: process.env['AWS_ACCESS_KEY_ID'] @@ -71,7 +77,7 @@ settings = sentry: dsn: process.env.SENTRY_DSN - + # Filestore health check # ---------------------- # Project and file details to check in persistor when calling /health_check From 28c3fe4a56e5d8203996d12ec9c61acdc867339e Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Tue, 10 Mar 2020 17:54:09 +0000 Subject: [PATCH 19/31] Validate key names when deleting directory with a configurable regex --- services/filestore/app/js/GcsPersistor.js | 2 +- .../filestore/config/settings.defaults.coffee | 2 + services/filestore/package-lock.json | 87 +++++++++++++++++++ services/filestore/package.json | 1 + .../test/acceptance/js/FilestoreTests.js | 53 ++++++----- .../test/acceptance/js/TestConfig.js | 3 +- .../test/unit/js/GcsPersistorTests.js | 28 +++++- 7 files changed, 144 insertions(+), 32 deletions(-) diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index a2525361c2..3a314b50c1 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -191,7 +191,7 @@ async function deleteFile(bucketName, key) { } async function deleteDirectory(bucketName, key) { - if (!key.match(/^[a-z0-9_-]+/i)) { + if (!key.match(settings.filestore.gcs.directoryKeyRegex)) { throw new NotFoundError({ message: 'deleteDirectoryKey is invalid or missing', info: { bucketName, key } diff --git a/services/filestore/config/settings.defaults.coffee b/services/filestore/config/settings.defaults.coffee index 0fe98effb1..7bb37db9de 100644 --- a/services/filestore/config/settings.defaults.coffee +++ b/services/filestore/config/settings.defaults.coffee @@ -39,6 +39,8 @@ settings = apiEndpoint: process.env['GCS_API_ENDPOINT'] apiScheme: process.env['GCS_API_SCHEME'] projectId: process.env['GCS_PROJECT_ID'] + # only keys that match this regex can be deleted + directoryKeyRegex: new RegExp(process.env['GCS_DIRECTORY_KEY_REGEX'] || "^[0-9a-fA-F]{24}/[0-9a-fA-F]{24}") s3: if process.env['AWS_ACCESS_KEY_ID']? or process.env['S3_BUCKET_CREDENTIALS']? diff --git a/services/filestore/package-lock.json b/services/filestore/package-lock.json index 1d74c5d172..e858a9e0be 100644 --- a/services/filestore/package-lock.json +++ b/services/filestore/package-lock.json @@ -1415,6 +1415,16 @@ "resolved": "https://registry.npmjs.org/bintrees/-/bintrees-1.0.1.tgz", "integrity": "sha512-tbaUB1QpTIj4cKY8c1rvNAvEQXA+ekzHmbe4jzNfW3QWsF9GnnP/BRWyl6/qqS53heoYJ93naaFcm/jooONH8g==" }, + "bl": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/bl/-/bl-2.2.0.tgz", + "integrity": "sha512-wbgvOpqopSr7uq6fJrLH8EsvYMJf9gzfo2jCsL2eTy75qXPukA4pCgHamOQkZtY5vmfVtjB+P3LNlMHW5CEZXA==", + "dev": true, + "requires": { + "readable-stream": "^2.3.5", + "safe-buffer": "^5.1.1" + } + }, "body-parser": { "version": "1.19.0", "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.19.0.tgz", @@ -1453,6 +1463,12 @@ "integrity": "sha512-qhAVI1+Av2X7qelOfAIYwXONood6XlZE/fXaBSmW/T5SzLAmCgzi+eiWE7fUvbHaeNBQH13UftjpXxsfLkMpgw==", "dev": true }, + "bson": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/bson/-/bson-1.1.3.tgz", + "integrity": "sha512-TdiJxMVnodVS7r0BdL42y/pqC9cL2iKynVwA0Ho3qbsQYr428veL3l7BQyuqiw+Q5SqqoT0m4srSY/BlZ9AxXg==", + "dev": true + }, "buffer": { "version": "4.9.1", "resolved": "https://registry.npmjs.org/buffer/-/buffer-4.9.1.tgz", @@ -1854,6 +1870,12 @@ "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==" }, + "denque": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/denque/-/denque-1.4.1.tgz", + "integrity": "sha512-OfzPuSZKGcgr96rf1oODnfjqBFmr1DVoc/TrItj3Ohe0Ah1C5WX5Baquw/9U9KovnQ88EqmJbD66rKYUQYN1tQ==", + "dev": true + }, "depd": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz", @@ -3501,6 +3523,13 @@ "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", "integrity": "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==" }, + "memory-pager": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/memory-pager/-/memory-pager-1.5.0.tgz", + "integrity": "sha512-ZS4Bp4r/Zoeq6+NLJpP+0Zzm0pR8whtGPf1XExKLJBAczGMnSi3It14OiNCStjQjM6NU1okjQGSxgEZN8eBYKg==", + "dev": true, + "optional": true + }, "merge-descriptors": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", @@ -3653,6 +3682,20 @@ "integrity": "sha512-bV7f+6l2QigeBBZSM/6yTNq4P2fNpSWj/0e7jQcy87A8e7o2nAfP/34/2ky5Vw4B9S446EtIhodAzkFCcR4dQg==", "optional": true }, + "mongodb": { + "version": "3.5.4", + "resolved": "https://registry.npmjs.org/mongodb/-/mongodb-3.5.4.tgz", + "integrity": "sha512-xGH41Ig4dkSH5ROGezkgDbsgt/v5zbNUwE3TcFsSbDc6Qn3Qil17dhLsESSDDPTiyFDCPJRpfd4887dtsPgKtA==", + "dev": true, + "requires": { + "bl": "^2.2.0", + "bson": "^1.1.1", + "denque": "^1.4.1", + "require_optional": "^1.0.1", + "safe-buffer": "^5.1.2", + "saslprep": "^1.0.0" + } + }, "ms": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", @@ -4939,6 +4982,30 @@ "integrity": "sha512-AKGr4qvHiryxRb19m3PsLRGuKVAbJLUD7E6eOaHkfKhwc+vSgVOCY5xNvm9EkolBKTOf0GrQAZKLimOCz81Khg==", "dev": true }, + "require_optional": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/require_optional/-/require_optional-1.0.1.tgz", + "integrity": "sha512-qhM/y57enGWHAe3v/NcwML6a3/vfESLe/sGM2dII+gEO0BpKRUkWZow/tyloNqJyN6kXSl3RyyM8Ll5D/sJP8g==", + "dev": true, + "requires": { + "resolve-from": "^2.0.0", + "semver": "^5.1.0" + }, + "dependencies": { + "resolve-from": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-2.0.0.tgz", + "integrity": "sha1-lICrIOlP+h2egKgEx+oUdhGWa1c=", + "dev": true + }, + "semver": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz", + "integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==", + "dev": true + } + } + }, "resolve": { "version": "1.15.1", "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.15.1.tgz", @@ -5041,6 +5108,16 @@ "stack-trace": "0.0.9" } }, + "saslprep": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/saslprep/-/saslprep-1.0.3.tgz", + "integrity": "sha512-/MY/PEMbk2SuY5sScONwhUDsV2p77Znkb/q3nSVstq/yQzYJOH/Azh29p9oJLsl3LnQwSvZDKagDGBsBwSooag==", + "dev": true, + "optional": true, + "requires": { + "sparse-bitfield": "^3.0.3" + } + }, "sax": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/sax/-/sax-1.2.1.tgz", @@ -5205,6 +5282,16 @@ "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==" }, + "sparse-bitfield": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/sparse-bitfield/-/sparse-bitfield-3.0.3.tgz", + "integrity": "sha1-/0rm5oZWBWuks+eSqzM004JzyhE=", + "dev": true, + "optional": true, + "requires": { + "memory-pager": "^1.0.2" + } + }, "spdx-correct": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.1.0.tgz", diff --git a/services/filestore/package.json b/services/filestore/package.json index 6f7d84c778..bbdd586f58 100644 --- a/services/filestore/package.json +++ b/services/filestore/package.json @@ -56,6 +56,7 @@ "eslint-plugin-promise": "^4.2.1", "eslint-plugin-standard": "^4.0.1", "mocha": "5.2.0", + "mongodb": "^3.5.4", "prettier-eslint": "^9.0.1", "prettier-eslint-cli": "^5.0.0", "sandboxed-module": "2.0.3", diff --git a/services/filestore/test/acceptance/js/FilestoreTests.js b/services/filestore/test/acceptance/js/FilestoreTests.js index 599d60155a..9da46db092 100644 --- a/services/filestore/test/acceptance/js/FilestoreTests.js +++ b/services/filestore/test/acceptance/js/FilestoreTests.js @@ -14,6 +14,7 @@ const { promisify } = require('util') const { Storage } = require('@google-cloud/storage') const streamifier = require('streamifier') chai.use(require('chai-as-promised')) +const { ObjectId } = require('mongodb') const fsWriteFile = promisify(fs.writeFile) const fsStat = promisify(fs.stat) @@ -48,7 +49,6 @@ const BackendSettings = require('./TestConfig') describe('Filestore', function() { this.timeout(1000 * 10) const filestoreUrl = `http://localhost:${Settings.internal.filestore.port}` - const directoryName = 'directory' // redefine the test suite for every available backend Object.keys(BackendSettings).forEach(backend => { @@ -84,7 +84,7 @@ describe('Filestore', function() { `${metricPrefix}_egress` ) } - projectId = `acceptance_tests_${Math.random()}` + projectId = ObjectId().toString() }) it('should send a 200 for the status endpoint', async function() { @@ -107,8 +107,8 @@ describe('Filestore', function() { '/tmp/filestore_acceptance_tests_file_read.txt' beforeEach(async function() { - fileId = Math.random() - fileUrl = `${filestoreUrl}/project/${projectId}/file/${directoryName}%2F${fileId}` + fileId = ObjectId().toString() + fileUrl = `${filestoreUrl}/project/${projectId}/file/${fileId}` constantFileContent = [ 'hello world', `line 2 goes here ${Math.random()}`, @@ -188,16 +188,16 @@ describe('Filestore', function() { }) it('should be able to copy files', async function() { - const newProjectID = `acceptance_tests_copied_project_${Math.random()}` - const newFileId = Math.random() - const newFileUrl = `${filestoreUrl}/project/${newProjectID}/file/${directoryName}%2F${newFileId}` + const newProjectID = ObjectId().toString() + const newFileId = ObjectId().toString() + const newFileUrl = `${filestoreUrl}/project/${newProjectID}/file/${newFileId}` const opts = { method: 'put', uri: newFileUrl, json: { source: { project_id: projectId, - file_id: `${directoryName}/${fileId}` + file_id: fileId } } } @@ -260,7 +260,6 @@ describe('Filestore', function() { describe('with multiple files', function() { let fileIds, fileUrls - const directoryName = 'directory' const localFileReadPaths = [ '/tmp/filestore_acceptance_tests_file_read_1.txt', '/tmp/filestore_acceptance_tests_file_read_2.txt' @@ -286,10 +285,10 @@ describe('Filestore', function() { }) beforeEach(async function() { - fileIds = [Math.random(), Math.random()] + fileIds = [ObjectId().toString(), ObjectId().toString()] fileUrls = [ - `${filestoreUrl}/project/${projectId}/file/${directoryName}%2F${fileIds[0]}`, - `${filestoreUrl}/project/${projectId}/file/${directoryName}%2F${fileIds[1]}` + `${filestoreUrl}/project/${projectId}/file/${fileIds[0]}`, + `${filestoreUrl}/project/${projectId}/file/${fileIds[1]}` ] const writeStreams = [ @@ -325,8 +324,8 @@ describe('Filestore', function() { let fileId, fileUrl, largeFileContent, error beforeEach(async function() { - fileId = Math.random() - fileUrl = `${filestoreUrl}/project/${projectId}/file/${directoryName}%2F${fileId}` + fileId = ObjectId().toString() + fileUrl = `${filestoreUrl}/project/${projectId}/file/${fileId}` largeFileContent = '_wombat_'.repeat(1024 * 1024) // 8 megabytes largeFileContent += Math.random() @@ -359,8 +358,8 @@ describe('Filestore', function() { beforeEach(async function() { constantFileContent = `This is a file in a different S3 bucket ${Math.random()}` - fileId = Math.random().toString() - bucketName = Math.random().toString() + fileId = ObjectId().toString() + bucketName = ObjectId().toString() fileUrl = `${filestoreUrl}/bucket/${bucketName}/key/${fileId}` const s3ClientSettings = { @@ -448,9 +447,9 @@ describe('Filestore', function() { beforeEach(function() { constantFileContent = `This is yet more file content ${Math.random()}` - fileId = Math.random().toString() - fileKey = `${projectId}/${directoryName}/${fileId}` - fileUrl = `${filestoreUrl}/project/${projectId}/file/${directoryName}%2F${fileId}` + fileId = ObjectId().toString() + fileKey = `${projectId}/${fileId}` + fileUrl = `${filestoreUrl}/project/${projectId}/file/${fileId}` bucket = Settings.filestore.stores.user_files fallbackBucket = Settings.filestore.fallback.buckets[bucket] @@ -532,10 +531,10 @@ describe('Filestore', function() { let newFileId, newFileUrl, newFileKey, opts beforeEach(function() { - const newProjectID = `acceptance_tests_copied_project_${Math.random()}` - newFileId = Math.random() - newFileUrl = `${filestoreUrl}/project/${newProjectID}/file/${directoryName}%2F${newFileId}` - newFileKey = `${newProjectID}/${directoryName}/${newFileId}` + const newProjectID = ObjectId().toString() + newFileId = ObjectId().toString() + newFileUrl = `${filestoreUrl}/project/${newProjectID}/file/${newFileId}` + newFileKey = `${newProjectID}/${newFileId}` opts = { method: 'put', @@ -543,7 +542,7 @@ describe('Filestore', function() { json: { source: { project_id: projectId, - file_id: `${directoryName}/${fileId}` + file_id: fileId } } } @@ -668,7 +667,7 @@ describe('Filestore', function() { await expectPersistorNotToHaveFile( app.persistor.fallbackPersistor, fallbackBucket, - `${projectId}/${directoryName}/${fileId}` + `${projectId}/${fileId}` ) }) }) @@ -757,8 +756,8 @@ describe('Filestore', function() { ) beforeEach(async function() { - fileId = Math.random() - fileUrl = `${filestoreUrl}/project/${projectId}/file/${directoryName}%2F${fileId}` + fileId = ObjectId().toString() + fileUrl = `${filestoreUrl}/project/${projectId}/file/${fileId}` const stat = await fsStat(localFileReadPath) localFileSize = stat.size const writeStream = request.post(fileUrl) diff --git a/services/filestore/test/acceptance/js/TestConfig.js b/services/filestore/test/acceptance/js/TestConfig.js index fd7d0f034c..833a3b09be 100644 --- a/services/filestore/test/acceptance/js/TestConfig.js +++ b/services/filestore/test/acceptance/js/TestConfig.js @@ -23,7 +23,8 @@ function gcsConfig() { return { apiEndpoint: process.env.GCS_API_ENDPOINT, apiScheme: process.env.GCS_API_SCHEME, - projectId: 'fake' + projectId: 'fake', + directoryKeyRegex: new RegExp('^[0-9a-fA-F]{24}/[0-9a-fA-F]{24}') } } diff --git a/services/filestore/test/unit/js/GcsPersistorTests.js b/services/filestore/test/unit/js/GcsPersistorTests.js index a63296a18f..0e5f77bdf1 100644 --- a/services/filestore/test/unit/js/GcsPersistorTests.js +++ b/services/filestore/test/unit/js/GcsPersistorTests.js @@ -3,6 +3,7 @@ const chai = require('chai') const { expect } = chai const modulePath = '../../../app/js/GcsPersistor.js' const SandboxedModule = require('sandboxed-module') +const { ObjectId } = require('mongodb') const Errors = require('../../../app/js/Errors') @@ -41,6 +42,9 @@ describe('GcsPersistorTests', function() { backend: 'gcs', stores: { user_files: 'user_files' + }, + gcs: { + directoryKeyRegex: /^[0-9a-fA-F]{24}\/[0-9a-fA-F]{24}/ } } } @@ -512,15 +516,17 @@ describe('GcsPersistorTests', function() { }) describe('deleteDirectory', function() { + const directoryName = `${ObjectId()}/${ObjectId()}` describe('with valid parameters', function() { beforeEach(async function() { - return GcsPersistor.promises.deleteDirectory(bucket, key) + console.log(key) + return GcsPersistor.promises.deleteDirectory(bucket, directoryName) }) it('should delete the objects in the directory', function() { expect(Storage.prototype.bucket).to.have.been.calledWith(bucket) expect(GcsBucket.deleteFiles).to.have.been.calledWith({ - directory: key, + directory: directoryName, force: true }) }) @@ -532,7 +538,7 @@ describe('GcsPersistorTests', function() { beforeEach(async function() { GcsBucket.deleteFiles = sinon.stub().rejects(genericError) try { - await GcsPersistor.promises.deleteDirectory(bucket, key) + await GcsPersistor.promises.deleteDirectory(bucket, directoryName) } catch (err) { error = err } @@ -546,6 +552,22 @@ describe('GcsPersistorTests', function() { expect(error.cause).to.equal(genericError) }) }) + + describe('when the directory name is in the wrong format', function() { + let error + + beforeEach(async function() { + try { + await GcsPersistor.promises.deleteDirectory(bucket, 'carbonara') + } catch (err) { + error = err + } + }) + + it('should throw a NotFoundError', function() { + expect(error).to.be.an.instanceOf(Errors.NotFoundError) + }) + }) }) describe('directorySize', function() { From 183cb0179a18514e6c6af7baa316c1438c79f23e Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Fri, 13 Mar 2020 16:14:06 +0000 Subject: [PATCH 20/31] Add support to GCS persistor for unlocking files and copying on delete --- services/filestore/app/js/GcsPersistor.js | 40 +++-- services/filestore/app/js/PersistorHelper.js | 3 +- .../filestore/config/settings.defaults.coffee | 15 +- .../test/acceptance/js/FilestoreTests.js | 168 +++++++++--------- .../test/acceptance/js/TestConfig.js | 12 +- .../test/acceptance/js/TestHelper.js | 54 ++++++ .../test/unit/js/GcsPersistorTests.js | 18 +- 7 files changed, 193 insertions(+), 117 deletions(-) create mode 100644 services/filestore/test/acceptance/js/TestHelper.js diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index 3a314b50c1..399ae68064 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -9,18 +9,21 @@ const PersistorHelper = require('./PersistorHelper') const pipeline = promisify(Stream.pipeline) -// both of these settings will be null by default except for tests +// endpoint settings will be null by default except for tests // that's OK - GCS uses the locally-configured service account by default -const storage = new Storage(settings.filestore.gcs) +const storage = new Storage(settings.filestore.gcs.endpoint) // workaround for broken uploads with custom endpoints: // https://github.com/googleapis/nodejs-storage/issues/898 -if (settings.filestore.gcs && settings.filestore.gcs.apiEndpoint) { +if ( + settings.filestore.gcs.endpoint && + settings.filestore.gcs.endpoint.apiEndpoint +) { storage.interceptors.push({ request: function(reqOpts) { const url = new URL(reqOpts.uri) - url.host = settings.filestore.gcs.apiEndpoint - if (settings.filestore.gcs.apiScheme) { - url.protocol = settings.filestore.gcs.apiScheme + url.host = settings.filestore.gcs.endpoint.apiEndpoint + if (settings.filestore.gcs.endpoint.apiScheme) { + url.protocol = settings.filestore.gcs.endpoint.apiScheme } reqOpts.uri = url.toString() return reqOpts @@ -173,10 +176,19 @@ async function getFileMd5Hash(bucketName, key) { async function deleteFile(bucketName, key) { try { - await storage - .bucket(bucketName) - .file(key) - .delete() + const file = storage.bucket(bucketName).file(key) + + if (settings.filestore.gcs.unlockBeforeDelete) { + await file.setMetadata({ eventBasedHold: false }) + } + if (settings.filestore.gcs.deletedBucketSuffix) { + await file.copy( + storage.bucket( + `${bucketName}${settings.filestore.gcs.deletedBucketSuffix}` + ) + ) + } + await file.delete() } catch (err) { const error = PersistorHelper.wrapError( err, @@ -199,9 +211,13 @@ async function deleteDirectory(bucketName, key) { } try { - await storage + const [files] = await storage .bucket(bucketName) - .deleteFiles({ directory: key, force: true }) + .getFiles({ directory: key }) + + for (const file of files) { + await deleteFile(bucketName, file.name) + } } catch (err) { const error = PersistorHelper.wrapError( err, diff --git a/services/filestore/app/js/PersistorHelper.js b/services/filestore/app/js/PersistorHelper.js index a19311e889..a58f024bb4 100644 --- a/services/filestore/app/js/PersistorHelper.js +++ b/services/filestore/app/js/PersistorHelper.js @@ -93,7 +93,8 @@ function wrapError(error, message, params, ErrorType) { error instanceof NotFoundError || ['NoSuchKey', 'NotFound', 404, 'AccessDenied', 'ENOENT'].includes( error.code - ) + ) || + (error.response && error.response.statusCode === 404) ) { return new NotFoundError({ message: 'no such file', diff --git a/services/filestore/config/settings.defaults.coffee b/services/filestore/config/settings.defaults.coffee index 7bb37db9de..24bce087ff 100644 --- a/services/filestore/config/settings.defaults.coffee +++ b/services/filestore/config/settings.defaults.coffee @@ -35,12 +35,15 @@ settings = backend: process.env['BACKEND'] gcs: - if process.env['GCS_API_ENDPOINT'] - apiEndpoint: process.env['GCS_API_ENDPOINT'] - apiScheme: process.env['GCS_API_SCHEME'] - projectId: process.env['GCS_PROJECT_ID'] - # only keys that match this regex can be deleted - directoryKeyRegex: new RegExp(process.env['GCS_DIRECTORY_KEY_REGEX'] || "^[0-9a-fA-F]{24}/[0-9a-fA-F]{24}") + endpoint: + if process.env['GCS_API_ENDPOINT'] + apiEndpoint: process.env['GCS_API_ENDPOINT'] + apiScheme: process.env['GCS_API_SCHEME'] + projectId: process.env['GCS_PROJECT_ID'] + # only keys that match this regex can be deleted + directoryKeyRegex: new RegExp(process.env['GCS_DIRECTORY_KEY_REGEX'] || "^[0-9a-fA-F]{24}/[0-9a-fA-F]{24}") + unlockBeforeDelete: process.env['GCS_UNLOCK_BEFORE_DELETE'] == "true" # unlock an event-based hold before deleting. default false + deletedBucketSuffix: process.env['GCS_DELETED_BUCKET_SUFFIX'] # if present, copy file to another bucket on delete. default null s3: if process.env['AWS_ACCESS_KEY_ID']? or process.env['S3_BUCKET_CREDENTIALS']? diff --git a/services/filestore/test/acceptance/js/FilestoreTests.js b/services/filestore/test/acceptance/js/FilestoreTests.js index 9da46db092..0aa2c8e294 100644 --- a/services/filestore/test/acceptance/js/FilestoreTests.js +++ b/services/filestore/test/acceptance/js/FilestoreTests.js @@ -4,6 +4,7 @@ const fs = require('fs') const Settings = require('settings-sharelatex') const Path = require('path') const FilestoreApp = require('./FilestoreApp') +const TestHelper = require('./TestHelper') const rp = require('request-promise-native').defaults({ resolveWithFullResponse: true }) @@ -20,28 +21,10 @@ const fsWriteFile = promisify(fs.writeFile) const fsStat = promisify(fs.stat) const pipeline = promisify(Stream.pipeline) -async function getMetric(filestoreUrl, metric) { - const res = await rp.get(`${filestoreUrl}/metrics`) - expect(res.statusCode).to.equal(200) - const metricRegex = new RegExp(`^${metric}{[^}]+} ([0-9]+)$`, 'm') - const found = metricRegex.exec(res.body) - return parseInt(found ? found[1] : 0) || 0 -} - if (!process.env.AWS_ACCESS_KEY_ID) { throw new Error('please provide credentials for the AWS S3 test server') } -function streamToString(stream) { - const chunks = [] - return new Promise((resolve, reject) => { - stream.on('data', chunk => chunks.push(chunk)) - stream.on('error', reject) - stream.on('end', () => resolve(Buffer.concat(chunks).toString('utf8'))) - stream.resume() - }) -} - // store settings for multiple backends, so that we can test each one. // fs will always be available - add others if they are configured const BackendSettings = require('./TestConfig') @@ -64,10 +47,19 @@ describe('Filestore', function() { if (BackendSettings[backend].gcs) { before(async function() { - const storage = new Storage(Settings.filestore.gcs) + const storage = new Storage(Settings.filestore.gcs.endpoint) await storage.createBucket(process.env.GCS_USER_FILES_BUCKET_NAME) await storage.createBucket(process.env.GCS_PUBLIC_FILES_BUCKET_NAME) await storage.createBucket(process.env.GCS_TEMPLATE_FILES_BUCKET_NAME) + await storage.createBucket( + `${process.env.GCS_USER_FILES_BUCKET_NAME}-deleted` + ) + await storage.createBucket( + `${process.env.GCS_PUBLIC_FILES_BUCKET_NAME}-deleted` + ) + await storage.createBucket( + `${process.env.GCS_TEMPLATE_FILES_BUCKET_NAME}-deleted` + ) }) } @@ -79,7 +71,7 @@ describe('Filestore', function() { // retrieve previous metrics from the app if (['s3', 'gcs'].includes(Settings.filestore.backend)) { metricPrefix = Settings.filestore.backend - previousEgress = await getMetric( + previousEgress = await TestHelper.getMetric( filestoreUrl, `${metricPrefix}_egress` ) @@ -129,7 +121,7 @@ describe('Filestore', function() { // The content hash validation might require a full download // in case the ETag field of the upload response is not a md5 sum. if (['s3', 'gcs'].includes(Settings.filestore.backend)) { - previousIngress = await getMetric( + previousIngress = await TestHelper.getMetric( filestoreUrl, `${metricPrefix}_ingress` ) @@ -223,7 +215,7 @@ describe('Filestore', function() { if (['S3Persistor', 'GcsPersistor'].includes(backend)) { it('should record an egress metric for the upload', async function() { - const metric = await getMetric( + const metric = await TestHelper.getMetric( filestoreUrl, `${metricPrefix}_egress` ) @@ -232,7 +224,7 @@ describe('Filestore', function() { it('should record an ingress metric when downloading the file', async function() { await rp.get(fileUrl) - const metric = await getMetric( + const metric = await TestHelper.getMetric( filestoreUrl, `${metricPrefix}_ingress` ) @@ -249,7 +241,7 @@ describe('Filestore', function() { } } await rp.get(options) - const metric = await getMetric( + const metric = await TestHelper.getMetric( filestoreUrl, `${metricPrefix}_ingress` ) @@ -394,50 +386,54 @@ describe('Filestore', function() { }) } + if (backend === 'GcsPersistor') { + describe('when deleting a file in GCS', function() { + let fileId, fileUrl, content, error + + beforeEach(async function() { + fileId = ObjectId() + fileUrl = `${filestoreUrl}/project/${projectId}/file/${fileId}` + + content = '_wombat_' + Math.random() + + const writeStream = request.post(fileUrl) + const readStream = streamifier.createReadStream(content) + // hack to consume the result to ensure the http request has been fully processed + const resultStream = fs.createWriteStream('/dev/null') + + try { + await pipeline(readStream, writeStream, resultStream) + await rp.delete(fileUrl) + } catch (err) { + error = err + } + }) + + it('should not throw an error', function() { + expect(error).not.to.exist + }) + + it('should copy the file to the deleted-files bucket', async function() { + await TestHelper.expectPersistorToHaveFile( + app.persistor, + `${Settings.filestore.stores.user_files}-deleted`, + `${projectId}/${fileId}`, + content + ) + }) + + it('should remove the file from the original bucket', async function() { + await TestHelper.expectPersistorNotToHaveFile( + app.persistor, + Settings.filestore.stores.user_files, + `${projectId}/${fileId}` + ) + }) + }) + } + if (BackendSettings[backend].fallback) { describe('with a fallback', function() { - async function uploadStringToPersistor( - persistor, - bucket, - key, - content - ) { - const fileStream = streamifier.createReadStream(content) - await persistor.promises.sendStream(bucket, key, fileStream) - } - - async function getStringFromPersistor(persistor, bucket, key) { - const stream = await persistor.promises.getFileStream( - bucket, - key, - {} - ) - return streamToString(stream) - } - - async function expectPersistorToHaveFile( - persistor, - bucket, - key, - content - ) { - const foundContent = await getStringFromPersistor( - persistor, - bucket, - key - ) - expect(foundContent).to.equal(content) - } - - async function expectPersistorNotToHaveFile(persistor, bucket, key) { - await expect( - getStringFromPersistor(persistor, bucket, key) - ).to.eventually.have.been.rejected.with.property( - 'name', - 'NotFoundError' - ) - } - let constantFileContent, fileId, fileKey, @@ -457,7 +453,7 @@ describe('Filestore', function() { describe('with a file in the fallback bucket', function() { beforeEach(async function() { - await uploadStringToPersistor( + await TestHelper.uploadStringToPersistor( app.persistor.fallbackPersistor, fallbackBucket, fileKey, @@ -466,7 +462,7 @@ describe('Filestore', function() { }) it('should not find file in the primary', async function() { - await expectPersistorNotToHaveFile( + await TestHelper.expectPersistorNotToHaveFile( app.persistor.primaryPersistor, bucket, fileKey @@ -474,7 +470,7 @@ describe('Filestore', function() { }) it('should find the file in the fallback', async function() { - await expectPersistorToHaveFile( + await TestHelper.expectPersistorToHaveFile( app.persistor.fallbackPersistor, fallbackBucket, fileKey, @@ -495,7 +491,7 @@ describe('Filestore', function() { it('should not copy the file to the primary', async function() { await rp.get(fileUrl) - await expectPersistorNotToHaveFile( + await TestHelper.expectPersistorNotToHaveFile( app.persistor.primaryPersistor, bucket, fileKey @@ -518,7 +514,7 @@ describe('Filestore', function() { // wait for the file to copy in the background await promisify(setTimeout)(1000) - await expectPersistorToHaveFile( + await TestHelper.expectPersistorToHaveFile( app.persistor.primaryPersistor, bucket, fileKey, @@ -557,7 +553,7 @@ describe('Filestore', function() { }) it('should leave the old file in the old bucket', async function() { - await expectPersistorToHaveFile( + await TestHelper.expectPersistorToHaveFile( app.persistor.fallbackPersistor, fallbackBucket, fileKey, @@ -566,7 +562,7 @@ describe('Filestore', function() { }) it('should not create a new file in the old bucket', async function() { - await expectPersistorNotToHaveFile( + await TestHelper.expectPersistorNotToHaveFile( app.persistor.fallbackPersistor, fallbackBucket, newFileKey @@ -574,7 +570,7 @@ describe('Filestore', function() { }) it('should create a new file in the new bucket', async function() { - await expectPersistorToHaveFile( + await TestHelper.expectPersistorToHaveFile( app.persistor.primaryPersistor, bucket, newFileKey, @@ -586,7 +582,7 @@ describe('Filestore', function() { // wait for the file to copy in the background await promisify(setTimeout)(1000) - await expectPersistorNotToHaveFile( + await TestHelper.expectPersistorNotToHaveFile( app.persistor.primaryPersistor, bucket, fileKey @@ -603,7 +599,7 @@ describe('Filestore', function() { }) it('should leave the old file in the old bucket', async function() { - await expectPersistorToHaveFile( + await TestHelper.expectPersistorToHaveFile( app.persistor.fallbackPersistor, fallbackBucket, fileKey, @@ -612,7 +608,7 @@ describe('Filestore', function() { }) it('should not create a new file in the old bucket', async function() { - await expectPersistorNotToHaveFile( + await TestHelper.expectPersistorNotToHaveFile( app.persistor.fallbackPersistor, fallbackBucket, newFileKey @@ -620,7 +616,7 @@ describe('Filestore', function() { }) it('should create a new file in the new bucket', async function() { - await expectPersistorToHaveFile( + await TestHelper.expectPersistorToHaveFile( app.persistor.primaryPersistor, bucket, newFileKey, @@ -632,7 +628,7 @@ describe('Filestore', function() { // wait for the file to copy in the background await promisify(setTimeout)(1000) - await expectPersistorToHaveFile( + await TestHelper.expectPersistorToHaveFile( app.persistor.primaryPersistor, bucket, fileKey, @@ -655,7 +651,7 @@ describe('Filestore', function() { }) it('should store the file on the primary', async function() { - await expectPersistorToHaveFile( + await TestHelper.expectPersistorToHaveFile( app.persistor.primaryPersistor, bucket, fileKey, @@ -664,7 +660,7 @@ describe('Filestore', function() { }) it('should not store the file on the fallback', async function() { - await expectPersistorNotToHaveFile( + await TestHelper.expectPersistorNotToHaveFile( app.persistor.fallbackPersistor, fallbackBucket, `${projectId}/${fileId}` @@ -675,7 +671,7 @@ describe('Filestore', function() { describe('when deleting a file', function() { describe('when the file exists on the primary', function() { beforeEach(async function() { - await uploadStringToPersistor( + await TestHelper.uploadStringToPersistor( app.persistor.primaryPersistor, bucket, fileKey, @@ -694,7 +690,7 @@ describe('Filestore', function() { describe('when the file exists on the fallback', function() { beforeEach(async function() { - await uploadStringToPersistor( + await TestHelper.uploadStringToPersistor( app.persistor.fallbackPersistor, fallbackBucket, fileKey, @@ -713,13 +709,13 @@ describe('Filestore', function() { describe('when the file exists on both the primary and the fallback', function() { beforeEach(async function() { - await uploadStringToPersistor( + await TestHelper.uploadStringToPersistor( app.persistor.primaryPersistor, bucket, fileKey, constantFileContent ) - await uploadStringToPersistor( + await TestHelper.uploadStringToPersistor( app.persistor.fallbackPersistor, fallbackBucket, fileKey, @@ -773,7 +769,7 @@ describe('Filestore', function() { if (['S3Persistor', 'GcsPersistor'].includes(backend)) { it('should record an egress metric for the upload', async function() { - const metric = await getMetric( + const metric = await TestHelper.getMetric( filestoreUrl, `${metricPrefix}_egress` ) diff --git a/services/filestore/test/acceptance/js/TestConfig.js b/services/filestore/test/acceptance/js/TestConfig.js index 833a3b09be..ec80e45c1f 100644 --- a/services/filestore/test/acceptance/js/TestConfig.js +++ b/services/filestore/test/acceptance/js/TestConfig.js @@ -21,10 +21,14 @@ function s3Stores() { function gcsConfig() { return { - apiEndpoint: process.env.GCS_API_ENDPOINT, - apiScheme: process.env.GCS_API_SCHEME, - projectId: 'fake', - directoryKeyRegex: new RegExp('^[0-9a-fA-F]{24}/[0-9a-fA-F]{24}') + endpoint: { + apiEndpoint: process.env.GCS_API_ENDPOINT, + apiScheme: process.env.GCS_API_SCHEME, + projectId: 'fake' + }, + directoryKeyRegex: new RegExp('^[0-9a-fA-F]{24}/[0-9a-fA-F]{24}'), + unlockBeforeDelete: false, // fake-gcs does not support this + deletedBucketSuffix: '-deleted' } } diff --git a/services/filestore/test/acceptance/js/TestHelper.js b/services/filestore/test/acceptance/js/TestHelper.js new file mode 100644 index 0000000000..df57303de1 --- /dev/null +++ b/services/filestore/test/acceptance/js/TestHelper.js @@ -0,0 +1,54 @@ +const streamifier = require('streamifier') +const rp = require('request-promise-native').defaults({ + resolveWithFullResponse: true +}) + +const { expect } = require('chai') + +module.exports = { + uploadStringToPersistor, + getStringFromPersistor, + expectPersistorToHaveFile, + expectPersistorNotToHaveFile, + streamToString, + getMetric +} + +async function getMetric(filestoreUrl, metric) { + const res = await rp.get(`${filestoreUrl}/metrics`) + expect(res.statusCode).to.equal(200) + const metricRegex = new RegExp(`^${metric}{[^}]+} ([0-9]+)$`, 'm') + const found = metricRegex.exec(res.body) + return parseInt(found ? found[1] : 0) || 0 +} + +function streamToString(stream) { + const chunks = [] + return new Promise((resolve, reject) => { + stream.on('data', chunk => chunks.push(chunk)) + stream.on('error', reject) + stream.on('end', () => resolve(Buffer.concat(chunks).toString('utf8'))) + stream.resume() + }) +} + +async function uploadStringToPersistor(persistor, bucket, key, content) { + const fileStream = streamifier.createReadStream(content) + await persistor.promises.sendStream(bucket, key, fileStream) +} + +async function getStringFromPersistor(persistor, bucket, key) { + const stream = await persistor.promises.getFileStream(bucket, key, {}) + return streamToString(stream) +} + +async function expectPersistorToHaveFile(persistor, bucket, key, content) { + const foundContent = await getStringFromPersistor(persistor, bucket, key) + expect(foundContent).to.equal(content) +} + +async function expectPersistorNotToHaveFile(persistor, bucket, key) { + await expect( + getStringFromPersistor(persistor, bucket, key) + ).to.eventually.have.been.rejected.with.property('name', 'NotFoundError') +} diff --git a/services/filestore/test/unit/js/GcsPersistorTests.js b/services/filestore/test/unit/js/GcsPersistorTests.js index 0e5f77bdf1..8a9df40485 100644 --- a/services/filestore/test/unit/js/GcsPersistorTests.js +++ b/services/filestore/test/unit/js/GcsPersistorTests.js @@ -88,8 +88,7 @@ describe('GcsPersistorTests', function() { GcsBucket = { file: sinon.stub().returns(GcsFile), - getFiles: sinon.stub().resolves([files]), - deleteFiles: sinon.stub().resolves() + getFiles: sinon.stub().resolves([files]) } Storage = class { @@ -523,20 +522,23 @@ describe('GcsPersistorTests', function() { return GcsPersistor.promises.deleteDirectory(bucket, directoryName) }) - it('should delete the objects in the directory', function() { + it('should list the objects in the directory', function() { expect(Storage.prototype.bucket).to.have.been.calledWith(bucket) - expect(GcsBucket.deleteFiles).to.have.been.calledWith({ - directory: directoryName, - force: true + expect(GcsBucket.getFiles).to.have.been.calledWith({ + directory: directoryName }) }) + + it('should delete the files', function() { + expect(GcsFile.delete).to.have.been.calledTwice + }) }) - describe('when there is an error deleting the objects', function() { + describe('when there is an error listing the objects', function() { let error beforeEach(async function() { - GcsBucket.deleteFiles = sinon.stub().rejects(genericError) + GcsBucket.getFiles = sinon.stub().rejects(genericError) try { await GcsPersistor.promises.deleteDirectory(bucket, directoryName) } catch (err) { From edf1ce1f7e91b6d497f1c340847ea55c447d84c3 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Sat, 14 Mar 2020 14:02:58 +0000 Subject: [PATCH 21/31] Delete files from a directory in parallel --- services/filestore/app/js/GcsPersistor.js | 5 +++-- services/filestore/package-lock.json | 21 +++++++++++++++++++ services/filestore/package.json | 3 ++- .../test/unit/js/GcsPersistorTests.js | 2 ++ 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index 399ae68064..d8639e70d4 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -5,6 +5,7 @@ const Stream = require('stream') const { Storage } = require('@google-cloud/storage') const { callbackify } = require('util') const { WriteError, ReadError, NotFoundError } = require('./Errors') +const asyncPool = require('tiny-async-pool') const PersistorHelper = require('./PersistorHelper') const pipeline = promisify(Stream.pipeline) @@ -215,9 +216,9 @@ async function deleteDirectory(bucketName, key) { .bucket(bucketName) .getFiles({ directory: key }) - for (const file of files) { + await asyncPool(50, files, async file => { await deleteFile(bucketName, file.name) - } + }) } catch (err) { const error = PersistorHelper.wrapError( err, diff --git a/services/filestore/package-lock.json b/services/filestore/package-lock.json index e858a9e0be..f9d1b1696b 100644 --- a/services/filestore/package-lock.json +++ b/services/filestore/package-lock.json @@ -5582,6 +5582,22 @@ "readable-stream": "2 || 3" } }, + "tiny-async-pool": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/tiny-async-pool/-/tiny-async-pool-1.1.0.tgz", + "integrity": "sha512-jIglyHF/9QdCC3662m/UMVADE6SlocBDpXdFLMZyiAfrw8MSG1pml7lwRtBMT6L/z4dddAxfzw2lpW2Vm42fyQ==", + "requires": { + "semver": "^5.5.0", + "yaassertion": "^1.0.0" + }, + "dependencies": { + "semver": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz", + "integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==" + } + } + }, "tmp": { "version": "0.0.33", "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.0.33.tgz", @@ -5980,6 +5996,11 @@ "integrity": "sha512-r9S/ZyXu/Xu9q1tYlpsLIsa3EeLXXk0VwlxqTcFRfg9EhMW+17kbt9G0NrgCmhGb5vT2hyhJZLfDGx+7+5Uj/w==", "dev": true }, + "yaassertion": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/yaassertion/-/yaassertion-1.0.2.tgz", + "integrity": "sha512-sBoJBg5vTr3lOpRX0yFD+tz7wv/l2UPMFthag4HGTMPrypBRKerjjS8jiEnNMjcAEtPXjbHiKE0UwRR1W1GXBg==" + }, "yallist": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", diff --git a/services/filestore/package.json b/services/filestore/package.json index bbdd586f58..73459bac71 100644 --- a/services/filestore/package.json +++ b/services/filestore/package.json @@ -36,7 +36,8 @@ "rimraf": "2.2.8", "settings-sharelatex": "^1.1.0", "stream-buffers": "~0.2.5", - "stream-meter": "^1.0.4" + "stream-meter": "^1.0.4", + "tiny-async-pool": "^1.1.0" }, "devDependencies": { "babel-eslint": "^10.0.3", diff --git a/services/filestore/test/unit/js/GcsPersistorTests.js b/services/filestore/test/unit/js/GcsPersistorTests.js index 8a9df40485..f02589c389 100644 --- a/services/filestore/test/unit/js/GcsPersistorTests.js +++ b/services/filestore/test/unit/js/GcsPersistorTests.js @@ -4,6 +4,7 @@ const { expect } = chai const modulePath = '../../../app/js/GcsPersistor.js' const SandboxedModule = require('sandboxed-module') const { ObjectId } = require('mongodb') +const asyncPool = require('tiny-async-pool') const Errors = require('../../../app/js/Errors') @@ -135,6 +136,7 @@ describe('GcsPersistorTests', function() { '@google-cloud/storage': { Storage }, 'settings-sharelatex': settings, 'logger-sharelatex': Logger, + 'tiny-async-pool': asyncPool, './Errors': Errors, fs: Fs, 'stream-meter': Meter, From 58db14456a02745fb13fc6a6f8182d9644da7d13 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Sat, 14 Mar 2020 14:11:17 +0000 Subject: [PATCH 22/31] Add timestamp to files in deleted bucket --- services/filestore/app/js/GcsPersistor.js | 6 +++--- services/filestore/package-lock.json | 6 ++++++ services/filestore/package.json | 3 ++- .../filestore/test/acceptance/js/FilestoreTests.js | 11 +++++++++-- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index d8639e70d4..29286ef505 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -184,9 +184,9 @@ async function deleteFile(bucketName, key) { } if (settings.filestore.gcs.deletedBucketSuffix) { await file.copy( - storage.bucket( - `${bucketName}${settings.filestore.gcs.deletedBucketSuffix}` - ) + storage + .bucket(`${bucketName}${settings.filestore.gcs.deletedBucketSuffix}`) + .file(`${key}-${new Date()}`) ) } await file.delete() diff --git a/services/filestore/package-lock.json b/services/filestore/package-lock.json index f9d1b1696b..7c21d9e128 100644 --- a/services/filestore/package-lock.json +++ b/services/filestore/package-lock.json @@ -5582,6 +5582,12 @@ "readable-stream": "2 || 3" } }, + "timekeeper": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/timekeeper/-/timekeeper-2.2.0.tgz", + "integrity": "sha512-W3AmPTJWZkRwu+iSNxPIsLZ2ByADsOLbbLxe46UJyWj3mlYLlwucKiq+/dPm0l9wTzqoF3/2PH0AGFCebjq23A==", + "dev": true + }, "tiny-async-pool": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/tiny-async-pool/-/tiny-async-pool-1.1.0.tgz", diff --git a/services/filestore/package.json b/services/filestore/package.json index 73459bac71..51530e86b4 100644 --- a/services/filestore/package.json +++ b/services/filestore/package.json @@ -63,6 +63,7 @@ "sandboxed-module": "2.0.3", "sinon": "7.1.1", "sinon-chai": "^3.3.0", - "streamifier": "^0.1.1" + "streamifier": "^0.1.1", + "timekeeper": "^2.2.0" } } diff --git a/services/filestore/test/acceptance/js/FilestoreTests.js b/services/filestore/test/acceptance/js/FilestoreTests.js index 0aa2c8e294..8369698891 100644 --- a/services/filestore/test/acceptance/js/FilestoreTests.js +++ b/services/filestore/test/acceptance/js/FilestoreTests.js @@ -16,6 +16,7 @@ const { Storage } = require('@google-cloud/storage') const streamifier = require('streamifier') chai.use(require('chai-as-promised')) const { ObjectId } = require('mongodb') +const tk = require('timekeeper') const fsWriteFile = promisify(fs.writeFile) const fsStat = promisify(fs.stat) @@ -388,9 +389,11 @@ describe('Filestore', function() { if (backend === 'GcsPersistor') { describe('when deleting a file in GCS', function() { - let fileId, fileUrl, content, error + let fileId, fileUrl, content, error, date beforeEach(async function() { + date = new Date() + tk.freeze(date) fileId = ObjectId() fileUrl = `${filestoreUrl}/project/${projectId}/file/${fileId}` @@ -409,6 +412,10 @@ describe('Filestore', function() { } }) + afterEach(function() { + tk.reset() + }) + it('should not throw an error', function() { expect(error).not.to.exist }) @@ -417,7 +424,7 @@ describe('Filestore', function() { await TestHelper.expectPersistorToHaveFile( app.persistor, `${Settings.filestore.stores.user_files}-deleted`, - `${projectId}/${fileId}`, + `${projectId}/${fileId}-${date}`, content ) }) From fc80aa3954e6823c7970adf63e4bced6594cf97f Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Sat, 14 Mar 2020 14:31:30 +0000 Subject: [PATCH 23/31] Move directory key validation into FileHandler --- services/filestore/app/js/FileHandler.js | 14 ++++++++++++- services/filestore/app/js/GcsPersistor.js | 7 ------- .../filestore/config/settings.defaults.coffee | 2 -- .../test/unit/js/FileHandlerTests.js | 21 +++++++++++++++++-- .../test/unit/js/GcsPersistorTests.js | 16 -------------- 5 files changed, 32 insertions(+), 28 deletions(-) diff --git a/services/filestore/app/js/FileHandler.js b/services/filestore/app/js/FileHandler.js index 02831fa3d0..9b592df34e 100644 --- a/services/filestore/app/js/FileHandler.js +++ b/services/filestore/app/js/FileHandler.js @@ -5,7 +5,7 @@ const LocalFileWriter = require('./LocalFileWriter') const FileConverter = require('./FileConverter') const KeyBuilder = require('./KeyBuilder') const ImageOptimiser = require('./ImageOptimiser') -const { ConversionError } = require('./Errors') +const { ConversionError, WriteError } = require('./Errors') module.exports = { insertFile: callbackify(insertFile), @@ -24,12 +24,24 @@ module.exports = { async function insertFile(bucket, key, stream) { const convertedKey = KeyBuilder.getConvertedFolderKey(key) + if (!convertedKey.match(/^[0-9a-f]{24}\/[0-9a-f]{24}/i)) { + throw new WriteError({ + message: 'key does not match validation regex', + info: { bucket, key, convertedKey } + }) + } await PersistorManager.promises.deleteDirectory(bucket, convertedKey) await PersistorManager.promises.sendStream(bucket, key, stream) } async function deleteFile(bucket, key) { const convertedKey = KeyBuilder.getConvertedFolderKey(key) + if (!convertedKey.match(/^[0-9a-f]{24}\/[0-9a-f]{24}/i)) { + throw new WriteError({ + message: 'key does not match validation regex', + info: { bucket, key, convertedKey } + }) + } await Promise.all([ PersistorManager.promises.deleteFile(bucket, key), PersistorManager.promises.deleteDirectory(bucket, convertedKey) diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index 29286ef505..bc46153983 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -204,13 +204,6 @@ async function deleteFile(bucketName, key) { } async function deleteDirectory(bucketName, key) { - if (!key.match(settings.filestore.gcs.directoryKeyRegex)) { - throw new NotFoundError({ - message: 'deleteDirectoryKey is invalid or missing', - info: { bucketName, key } - }) - } - try { const [files] = await storage .bucket(bucketName) diff --git a/services/filestore/config/settings.defaults.coffee b/services/filestore/config/settings.defaults.coffee index 24bce087ff..6b5238e552 100644 --- a/services/filestore/config/settings.defaults.coffee +++ b/services/filestore/config/settings.defaults.coffee @@ -40,8 +40,6 @@ settings = apiEndpoint: process.env['GCS_API_ENDPOINT'] apiScheme: process.env['GCS_API_SCHEME'] projectId: process.env['GCS_PROJECT_ID'] - # only keys that match this regex can be deleted - directoryKeyRegex: new RegExp(process.env['GCS_DIRECTORY_KEY_REGEX'] || "^[0-9a-fA-F]{24}/[0-9a-fA-F]{24}") unlockBeforeDelete: process.env['GCS_UNLOCK_BEFORE_DELETE'] == "true" # unlock an event-based hold before deleting. default false deletedBucketSuffix: process.env['GCS_DELETED_BUCKET_SUFFIX'] # if present, copy file to another bucket on delete. default null diff --git a/services/filestore/test/unit/js/FileHandlerTests.js b/services/filestore/test/unit/js/FileHandlerTests.js index 623ed440b0..9692521531 100644 --- a/services/filestore/test/unit/js/FileHandlerTests.js +++ b/services/filestore/test/unit/js/FileHandlerTests.js @@ -3,6 +3,7 @@ const chai = require('chai') const { expect } = chai const modulePath = '../../../app/js/FileHandler.js' const SandboxedModule = require('sandboxed-module') +const { ObjectId } = require('mongodb') chai.use(require('sinon-chai')) chai.use(require('chai-as-promised')) @@ -24,8 +25,8 @@ describe('FileHandler', function() { } const bucket = 'my_bucket' - const key = 'key/here' - const convertedFolderKey = 'convertedFolder' + const key = `${ObjectId()}/${ObjectId()}` + const convertedFolderKey = `${ObjectId()}/${ObjectId()}` const sourceStream = 'sourceStream' const convertedKey = 'convertedKey' const readStream = { @@ -112,6 +113,14 @@ describe('FileHandler', function() { done() }) }) + + it('should throw an error when the key is in the wrong format', function(done) { + KeyBuilder.getConvertedFolderKey.returns('wombat') + FileHandler.insertFile(bucket, key, stream, err => { + expect(err).to.exist + done() + }) + }) }) describe('deleteFile', function() { @@ -135,6 +144,14 @@ describe('FileHandler', function() { done() }) }) + + it('should throw an error when the key is in the wrong format', function(done) { + KeyBuilder.getConvertedFolderKey.returns('wombat') + FileHandler.deleteFile(bucket, key, err => { + expect(err).to.exist + done() + }) + }) }) describe('getFile', function() { diff --git a/services/filestore/test/unit/js/GcsPersistorTests.js b/services/filestore/test/unit/js/GcsPersistorTests.js index f02589c389..cd95bf1e20 100644 --- a/services/filestore/test/unit/js/GcsPersistorTests.js +++ b/services/filestore/test/unit/js/GcsPersistorTests.js @@ -556,22 +556,6 @@ describe('GcsPersistorTests', function() { expect(error.cause).to.equal(genericError) }) }) - - describe('when the directory name is in the wrong format', function() { - let error - - beforeEach(async function() { - try { - await GcsPersistor.promises.deleteDirectory(bucket, 'carbonara') - } catch (err) { - error = err - } - }) - - it('should throw a NotFoundError', function() { - expect(error).to.be.an.instanceOf(Errors.NotFoundError) - }) - }) }) describe('directorySize', function() { From 47e96a4d94a3e28e30fb40d035f9978576bc4c5c Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Sat, 14 Mar 2020 14:56:29 +0000 Subject: [PATCH 24/31] Add endpoint to delete a project Needs acceptance tests --- services/filestore/app.js | 5 +++++ services/filestore/app/js/FileController.js | 17 +++++++++++++++ services/filestore/app/js/FileHandler.js | 12 +++++++++++ services/filestore/app/js/KeyBuilder.js | 8 +++++++ .../test/unit/js/FileControllerTests.js | 19 +++++++++++++++++ .../test/unit/js/FileHandlerTests.js | 21 +++++++++++++++++++ 6 files changed, 82 insertions(+) diff --git a/services/filestore/app.js b/services/filestore/app.js index ea2c2ca1d8..2d5b27ee6b 100644 --- a/services/filestore/app.js +++ b/services/filestore/app.js @@ -61,6 +61,11 @@ app.delete( keyBuilder.userFileKeyMiddleware, fileController.deleteFile ) +app.delete( + '/project/:project_id', + keyBuilder.userProjectKeyMiddleware, + fileController.deleteProject +) app.head( '/template/:template_id/v/:version/:format', diff --git a/services/filestore/app/js/FileController.js b/services/filestore/app/js/FileController.js index 930434dc9d..9ddafb9f69 100644 --- a/services/filestore/app/js/FileController.js +++ b/services/filestore/app/js/FileController.js @@ -13,6 +13,7 @@ module.exports = { insertFile, copyFile, deleteFile, + deleteProject, directorySize } @@ -158,6 +159,22 @@ function deleteFile(req, res, next) { }) } +function deleteProject(req, res, next) { + metrics.inc('deleteProject') + const { project_id: projectId, bucket } = req + + req.requestLogger.setMessage('getting project size') + req.requestLogger.addFields({ projectId, bucket }) + + FileHandler.deleteProject(bucket, projectId, function(err) { + if (err) { + next(err) + } else { + res.sendStatus(204) + } + }) +} + function directorySize(req, res, next) { metrics.inc('projectSize') const { project_id: projectId, bucket } = req diff --git a/services/filestore/app/js/FileHandler.js b/services/filestore/app/js/FileHandler.js index 9b592df34e..40bdc95e34 100644 --- a/services/filestore/app/js/FileHandler.js +++ b/services/filestore/app/js/FileHandler.js @@ -10,6 +10,7 @@ const { ConversionError, WriteError } = require('./Errors') module.exports = { insertFile: callbackify(insertFile), deleteFile: callbackify(deleteFile), + deleteProject: callbackify(deleteProject), getFile: callbackify(getFile), getFileSize: callbackify(getFileSize), getDirectorySize: callbackify(getDirectorySize), @@ -17,6 +18,7 @@ module.exports = { getFile, insertFile, deleteFile, + deleteProject, getFileSize, getDirectorySize } @@ -48,6 +50,16 @@ async function deleteFile(bucket, key) { ]) } +async function deleteProject(bucket, key) { + if (!key.match(/^[0-9a-f]{24}\//i)) { + throw new WriteError({ + message: 'key does not match validation regex', + info: { bucket, key } + }) + } + await PersistorManager.promises.deleteDirectory(bucket, key) +} + async function getFile(bucket, key, opts) { opts = opts || {} if (!opts.format && !opts.style) { diff --git a/services/filestore/app/js/KeyBuilder.js b/services/filestore/app/js/KeyBuilder.js index 66cf563014..9968753349 100644 --- a/services/filestore/app/js/KeyBuilder.js +++ b/services/filestore/app/js/KeyBuilder.js @@ -4,6 +4,7 @@ module.exports = { getConvertedFolderKey, addCachingToKey, userFileKeyMiddleware, + userProjectKeyMiddleware, publicFileKeyMiddleware, publicProjectKeyMiddleware, bucketFileKeyMiddleware, @@ -37,6 +38,13 @@ function userFileKeyMiddleware(req, res, next) { next() } +function userProjectKeyMiddleware(req, res, next) { + const { project_id: projectId } = req.params + req.key = `${projectId}/` + req.bucket = settings.filestore.stores.user_files + next() +} + function publicFileKeyMiddleware(req, res, next) { if (settings.filestore.stores.public_files == null) { return res.status(501).send('public files not available') diff --git a/services/filestore/test/unit/js/FileControllerTests.js b/services/filestore/test/unit/js/FileControllerTests.js index 2d1411ea27..4a99a875a9 100644 --- a/services/filestore/test/unit/js/FileControllerTests.js +++ b/services/filestore/test/unit/js/FileControllerTests.js @@ -40,6 +40,7 @@ describe('FileController', function() { getFile: sinon.stub().yields(null, fileStream), getFileSize: sinon.stub().yields(null, fileSize), deleteFile: sinon.stub().yields(), + deleteProject: sinon.stub().yields(), insertFile: sinon.stub().yields(), getDirectorySize: sinon.stub().yields(null, fileSize) } @@ -67,6 +68,7 @@ describe('FileController', function() { req = { key: key, bucket: bucket, + project_id: projectId, query: {}, params: { project_id: projectId, @@ -257,6 +259,23 @@ describe('FileController', function() { }) }) + describe('delete project', function() { + it('should tell the file handler', function(done) { + res.sendStatus = code => { + code.should.equal(204) + expect(FileHandler.deleteProject).to.have.been.calledWith(bucket, projectId) + done() + } + FileController.deleteProject(req, res, next) + }) + + it('should send a 500 if there was an error', function() { + FileHandler.deleteProject.yields(error) + FileController.deleteProject(req, res, next) + expect(next).to.have.been.calledWith(error) + }) + }) + describe('directorySize', function() { it('should return total directory size bytes', function(done) { FileController.directorySize(req, { diff --git a/services/filestore/test/unit/js/FileHandlerTests.js b/services/filestore/test/unit/js/FileHandlerTests.js index 9692521531..acd3b8fc86 100644 --- a/services/filestore/test/unit/js/FileHandlerTests.js +++ b/services/filestore/test/unit/js/FileHandlerTests.js @@ -27,6 +27,7 @@ describe('FileHandler', function() { const bucket = 'my_bucket' const key = `${ObjectId()}/${ObjectId()}` const convertedFolderKey = `${ObjectId()}/${ObjectId()}` + const projectKey = `${ObjectId()}/` const sourceStream = 'sourceStream' const convertedKey = 'convertedKey' const readStream = { @@ -154,6 +155,26 @@ describe('FileHandler', function() { }) }) + describe('deleteProject', function() { + it('should tell the filestore manager to delete the folder', function(done) { + FileHandler.deleteProject(bucket, projectKey, err => { + expect(err).not.to.exist + expect(PersistorManager.promises.deleteDirectory).to.have.been.calledWith( + bucket, + projectKey + ) + done() + }) + }) + + it('should throw an error when the key is in the wrong format', function(done) { + FileHandler.deleteProject(bucket, 'wombat', err => { + expect(err).to.exist + done() + }) + }) + }) + describe('getFile', function() { it('should return the source stream no format or style are defined', function(done) { FileHandler.getFile(bucket, key, null, (err, stream) => { From ce52f8aa602f08b643a2cc8ab730c44254c533b4 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Mon, 16 Mar 2020 11:33:51 +0000 Subject: [PATCH 25/31] Update FSPersistor deleteDirectory behaviour to match S3 and GCS --- services/filestore/app/js/FSPersistor.js | 6 ++--- services/filestore/package-lock.json | 5 ---- services/filestore/package.json | 1 - .../test/unit/js/FSPersistorTests.js | 27 +++++++++---------- 4 files changed, 15 insertions(+), 24 deletions(-) diff --git a/services/filestore/app/js/FSPersistor.js b/services/filestore/app/js/FSPersistor.js index 973c670efd..4e514e3350 100644 --- a/services/filestore/app/js/FSPersistor.js +++ b/services/filestore/app/js/FSPersistor.js @@ -1,7 +1,6 @@ const fs = require('fs') const glob = require('glob') const path = require('path') -const rimraf = require('rimraf') const Stream = require('stream') const { promisify, callbackify } = require('util') @@ -14,7 +13,6 @@ const fsUnlink = promisify(fs.unlink) const fsOpen = promisify(fs.open) const fsStat = promisify(fs.stat) const fsGlob = promisify(glob) -const rmrf = promisify(rimraf) const filterName = key => key.replace(/\//g, '_') @@ -146,7 +144,9 @@ async function deleteDirectory(location, name) { const filteredName = filterName(name.replace(/\/$/, '')) try { - await rmrf(`${location}/${filteredName}`) + await Promise.all( + (await fsGlob(`${location}/${filteredName}*`)).map(file => fsUnlink(file)) + ) } catch (err) { throw PersistorHelper.wrapError( err, diff --git a/services/filestore/package-lock.json b/services/filestore/package-lock.json index 7c21d9e128..90f5698668 100644 --- a/services/filestore/package-lock.json +++ b/services/filestore/package-lock.json @@ -5059,11 +5059,6 @@ } } }, - "rimraf": { - "version": "2.2.8", - "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.2.8.tgz", - "integrity": "sha512-R5KMKHnPAQaZMqLOsyuyUmcIjSeDm+73eoqQpaXA7AZ22BL+6C+1mcUscgOsNd8WVlJuvlgAPsegcx7pjlV0Dg==" - }, "run-async": { "version": "2.4.0", "resolved": "https://registry.npmjs.org/run-async/-/run-async-2.4.0.tgz", diff --git a/services/filestore/package.json b/services/filestore/package.json index 51530e86b4..c4b8f16b15 100644 --- a/services/filestore/package.json +++ b/services/filestore/package.json @@ -33,7 +33,6 @@ "range-parser": "^1.0.2", "request": "^2.88.0", "request-promise-native": "^1.0.8", - "rimraf": "2.2.8", "settings-sharelatex": "^1.1.0", "stream-buffers": "~0.2.5", "stream-meter": "^1.0.4", diff --git a/services/filestore/test/unit/js/FSPersistorTests.js b/services/filestore/test/unit/js/FSPersistorTests.js index 4dd5a2fa11..4777de502a 100644 --- a/services/filestore/test/unit/js/FSPersistorTests.js +++ b/services/filestore/test/unit/js/FSPersistorTests.js @@ -22,15 +22,7 @@ describe('FSPersistorTests', function() { const files = ['animals/wombat.tex', 'vegetables/potato.tex'] const globs = [`${location}/${files[0]}`, `${location}/${files[1]}`] const filteredFilenames = ['animals_wombat.tex', 'vegetables_potato.tex'] - let fs, - rimraf, - stream, - LocalFileWriter, - FSPersistor, - glob, - readStream, - crypto, - Hash + let fs, stream, LocalFileWriter, FSPersistor, glob, readStream, crypto, Hash beforeEach(function() { readStream = { @@ -46,7 +38,6 @@ describe('FSPersistorTests', function() { stat: sinon.stub().yields(null, stat) } glob = sinon.stub().yields(null, globs) - rimraf = sinon.stub().yields() stream = { pipeline: sinon.stub().yields() } LocalFileWriter = { promises: { @@ -68,7 +59,6 @@ describe('FSPersistorTests', function() { './Errors': Errors, fs, glob, - rimraf, stream, crypto, // imported by PersistorHelper but otherwise unused here @@ -271,15 +261,22 @@ describe('FSPersistorTests', function() { }) describe('deleteDirectory', function() { - it('Should call rmdir(rimraf) with correct options', async function() { + it('Should call glob with correct options', async function() { await FSPersistor.promises.deleteDirectory(location, files[0]) - expect(rimraf).to.have.been.calledWith( - `${location}/${filteredFilenames[0]}` + expect(glob).to.have.been.calledWith( + `${location}/${filteredFilenames[0]}*` ) }) + it('Should call unlink on the returned files', async function() { + await FSPersistor.promises.deleteDirectory(location, files[0]) + for (const filename of globs) { + expect(fs.unlink).to.have.been.calledWith(filename) + } + }) + it('Should propagate the error', async function() { - rimraf.yields(error) + glob.yields(error) await expect( FSPersistor.promises.deleteDirectory(location, files[0]) ).to.eventually.be.rejected.and.have.property('cause', error) From 9f74aac1a08fcb099ef3ebe98115d8f0710e0488 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Mon, 16 Mar 2020 11:34:45 +0000 Subject: [PATCH 26/31] Add acceptance tests for directory deletion --- .../test/acceptance/js/FilestoreTests.js | 35 +++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/services/filestore/test/acceptance/js/FilestoreTests.js b/services/filestore/test/acceptance/js/FilestoreTests.js index 8369698891..dc31d1a83c 100644 --- a/services/filestore/test/acceptance/js/FilestoreTests.js +++ b/services/filestore/test/acceptance/js/FilestoreTests.js @@ -252,7 +252,7 @@ describe('Filestore', function() { }) describe('with multiple files', function() { - let fileIds, fileUrls + let fileIds, fileUrls, projectUrl const localFileReadPaths = [ '/tmp/filestore_acceptance_tests_file_read_1.txt', '/tmp/filestore_acceptance_tests_file_read_2.txt' @@ -278,10 +278,11 @@ describe('Filestore', function() { }) beforeEach(async function() { + projectUrl = `${filestoreUrl}/project/${projectId}` fileIds = [ObjectId().toString(), ObjectId().toString()] fileUrls = [ - `${filestoreUrl}/project/${projectId}/file/${fileIds[0]}`, - `${filestoreUrl}/project/${projectId}/file/${fileIds[1]}` + `${projectUrl}/file/${fileIds[0]}`, + `${projectUrl}/file/${fileIds[1]}` ] const writeStreams = [ @@ -311,6 +312,34 @@ describe('Filestore', function() { constantFileContents[0].length + constantFileContents[1].length ) }) + + it('should store the files', async function() { + for (const index in fileUrls) { + await expect(rp.get(fileUrls[index])).to.eventually.have.property( + 'body', + constantFileContents[index] + ) + } + }) + + it('should be able to delete the project', async function() { + await expect(rp.delete(projectUrl)).to.eventually.have.property( + 'statusCode', + 204 + ) + + for (const index in fileUrls) { + await expect( + rp.get(fileUrls[index]) + ).to.eventually.be.rejected.and.have.property('statusCode', 404) + } + }) + + it('should not delete a partial project id', async function() { + await expect( + rp.delete(`${filestoreUrl}/project/5`) + ).to.eventually.be.rejected.and.have.property('statusCode', 400) + }) }) describe('with a large file', function() { From 06c4c0f74f5abd2df18b0d156da71ee29dfae575 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Mon, 16 Mar 2020 11:35:01 +0000 Subject: [PATCH 27/31] Fix incorrect key when deleting projects --- services/filestore/app/js/Errors.js | 4 +++- services/filestore/app/js/FileController.js | 11 +++++++---- services/filestore/app/js/FileHandler.js | 8 ++++---- .../filestore/test/unit/js/FileControllerTests.js | 2 +- services/filestore/test/unit/js/FileHandlerTests.js | 7 +++---- 5 files changed, 18 insertions(+), 14 deletions(-) diff --git a/services/filestore/app/js/Errors.js b/services/filestore/app/js/Errors.js index 445b666e17..1beefb79c8 100644 --- a/services/filestore/app/js/Errors.js +++ b/services/filestore/app/js/Errors.js @@ -25,6 +25,7 @@ class ConversionsDisabledError extends BackwardCompatibleError {} class ConversionError extends BackwardCompatibleError {} class SettingsError extends BackwardCompatibleError {} class TimeoutError extends BackwardCompatibleError {} +class InvalidParametersError extends BackwardCompatibleError {} class FailedCommandError extends OError { constructor(command, code, stdout, stderr) { @@ -50,5 +51,6 @@ module.exports = { ConversionError, HealthCheckError, SettingsError, - TimeoutError + TimeoutError, + InvalidParametersError } diff --git a/services/filestore/app/js/FileController.js b/services/filestore/app/js/FileController.js index 9ddafb9f69..0e663f9421 100644 --- a/services/filestore/app/js/FileController.js +++ b/services/filestore/app/js/FileController.js @@ -161,13 +161,16 @@ function deleteFile(req, res, next) { function deleteProject(req, res, next) { metrics.inc('deleteProject') - const { project_id: projectId, bucket } = req + const { key, bucket } = req - req.requestLogger.setMessage('getting project size') - req.requestLogger.addFields({ projectId, bucket }) + req.requestLogger.setMessage('deleting project') + req.requestLogger.addFields({ key, bucket }) - FileHandler.deleteProject(bucket, projectId, function(err) { + FileHandler.deleteProject(bucket, key, function(err) { if (err) { + if (err instanceof Errors.InvalidParametersError) { + return res.sendStatus(400) + } next(err) } else { res.sendStatus(204) diff --git a/services/filestore/app/js/FileHandler.js b/services/filestore/app/js/FileHandler.js index 40bdc95e34..a6032350b1 100644 --- a/services/filestore/app/js/FileHandler.js +++ b/services/filestore/app/js/FileHandler.js @@ -5,7 +5,7 @@ const LocalFileWriter = require('./LocalFileWriter') const FileConverter = require('./FileConverter') const KeyBuilder = require('./KeyBuilder') const ImageOptimiser = require('./ImageOptimiser') -const { ConversionError, WriteError } = require('./Errors') +const { ConversionError, InvalidParametersError } = require('./Errors') module.exports = { insertFile: callbackify(insertFile), @@ -27,7 +27,7 @@ module.exports = { async function insertFile(bucket, key, stream) { const convertedKey = KeyBuilder.getConvertedFolderKey(key) if (!convertedKey.match(/^[0-9a-f]{24}\/[0-9a-f]{24}/i)) { - throw new WriteError({ + throw new InvalidParametersError({ message: 'key does not match validation regex', info: { bucket, key, convertedKey } }) @@ -39,7 +39,7 @@ async function insertFile(bucket, key, stream) { async function deleteFile(bucket, key) { const convertedKey = KeyBuilder.getConvertedFolderKey(key) if (!convertedKey.match(/^[0-9a-f]{24}\/[0-9a-f]{24}/i)) { - throw new WriteError({ + throw new InvalidParametersError({ message: 'key does not match validation regex', info: { bucket, key, convertedKey } }) @@ -52,7 +52,7 @@ async function deleteFile(bucket, key) { async function deleteProject(bucket, key) { if (!key.match(/^[0-9a-f]{24}\//i)) { - throw new WriteError({ + throw new InvalidParametersError({ message: 'key does not match validation regex', info: { bucket, key } }) diff --git a/services/filestore/test/unit/js/FileControllerTests.js b/services/filestore/test/unit/js/FileControllerTests.js index 4a99a875a9..16fbb3641c 100644 --- a/services/filestore/test/unit/js/FileControllerTests.js +++ b/services/filestore/test/unit/js/FileControllerTests.js @@ -263,7 +263,7 @@ describe('FileController', function() { it('should tell the file handler', function(done) { res.sendStatus = code => { code.should.equal(204) - expect(FileHandler.deleteProject).to.have.been.calledWith(bucket, projectId) + expect(FileHandler.deleteProject).to.have.been.calledWith(bucket, key) done() } FileController.deleteProject(req, res, next) diff --git a/services/filestore/test/unit/js/FileHandlerTests.js b/services/filestore/test/unit/js/FileHandlerTests.js index acd3b8fc86..7823c9454f 100644 --- a/services/filestore/test/unit/js/FileHandlerTests.js +++ b/services/filestore/test/unit/js/FileHandlerTests.js @@ -159,10 +159,9 @@ describe('FileHandler', function() { it('should tell the filestore manager to delete the folder', function(done) { FileHandler.deleteProject(bucket, projectKey, err => { expect(err).not.to.exist - expect(PersistorManager.promises.deleteDirectory).to.have.been.calledWith( - bucket, - projectKey - ) + expect( + PersistorManager.promises.deleteDirectory + ).to.have.been.calledWith(bucket, projectKey) done() }) }) From 9b658dda18cb2431e3b77f3f1011d0310294bf20 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Mon, 16 Mar 2020 15:53:45 +0000 Subject: [PATCH 28/31] Copy-on-delete before unlocking --- services/filestore/app/js/GcsPersistor.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index bc46153983..ddaf37d42d 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -179,9 +179,6 @@ async function deleteFile(bucketName, key) { try { const file = storage.bucket(bucketName).file(key) - if (settings.filestore.gcs.unlockBeforeDelete) { - await file.setMetadata({ eventBasedHold: false }) - } if (settings.filestore.gcs.deletedBucketSuffix) { await file.copy( storage @@ -189,6 +186,9 @@ async function deleteFile(bucketName, key) { .file(`${key}-${new Date()}`) ) } + if (settings.filestore.gcs.unlockBeforeDelete) { + await file.setMetadata({ eventBasedHold: false }) + } await file.delete() } catch (err) { const error = PersistorHelper.wrapError( From b37c52fc3ab5369638654046a7f91028b94105ab Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Mon, 16 Mar 2020 15:54:05 +0000 Subject: [PATCH 29/31] Make GCS delete concurrency configurable --- services/filestore/app/js/GcsPersistor.js | 10 +++++++--- services/filestore/config/settings.defaults.coffee | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index ddaf37d42d..a78bfce2cd 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -209,9 +209,13 @@ async function deleteDirectory(bucketName, key) { .bucket(bucketName) .getFiles({ directory: key }) - await asyncPool(50, files, async file => { - await deleteFile(bucketName, file.name) - }) + await asyncPool( + settings.filestore.gcs.deleteConcurrency, + files, + async file => { + await deleteFile(bucketName, file.name) + } + ) } catch (err) { const error = PersistorHelper.wrapError( err, diff --git a/services/filestore/config/settings.defaults.coffee b/services/filestore/config/settings.defaults.coffee index 6b5238e552..6867945d10 100644 --- a/services/filestore/config/settings.defaults.coffee +++ b/services/filestore/config/settings.defaults.coffee @@ -42,6 +42,7 @@ settings = projectId: process.env['GCS_PROJECT_ID'] unlockBeforeDelete: process.env['GCS_UNLOCK_BEFORE_DELETE'] == "true" # unlock an event-based hold before deleting. default false deletedBucketSuffix: process.env['GCS_DELETED_BUCKET_SUFFIX'] # if present, copy file to another bucket on delete. default null + deleteConcurrency: parseInt(process.env['GCS_DELETE_CONCURRENCY']) || 50 s3: if process.env['AWS_ACCESS_KEY_ID']? or process.env['S3_BUCKET_CREDENTIALS']? From 9d32d4ec16b97007743126c432d00c70c62646db Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Mon, 16 Mar 2020 15:57:37 +0000 Subject: [PATCH 30/31] Don't modify 'opts' parameter --- services/filestore/app/js/GcsPersistor.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index a78bfce2cd..5a44132882 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -113,7 +113,8 @@ async function sendStream(bucketName, key, readStream, sourceMd5) { } } -async function getFileStream(bucketName, key, opts = {}) { +async function getFileStream(bucketName, key, _opts = {}) { + const opts = Object.assign({}, _opts) if (opts.end) { // S3 (and http range headers) treat 'end' as inclusive, so increase this by 1 opts.end++ From cb4bdd99f4b2094940a32e634e0a521810156f03 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Mon, 16 Mar 2020 16:09:56 +0000 Subject: [PATCH 31/31] Use an ISODate for deleted file names --- services/filestore/app/js/GcsPersistor.js | 2 +- services/filestore/test/acceptance/js/FilestoreTests.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/filestore/app/js/GcsPersistor.js b/services/filestore/app/js/GcsPersistor.js index 5a44132882..799ee65905 100644 --- a/services/filestore/app/js/GcsPersistor.js +++ b/services/filestore/app/js/GcsPersistor.js @@ -184,7 +184,7 @@ async function deleteFile(bucketName, key) { await file.copy( storage .bucket(`${bucketName}${settings.filestore.gcs.deletedBucketSuffix}`) - .file(`${key}-${new Date()}`) + .file(`${key}-${new Date().toISOString()}`) ) } if (settings.filestore.gcs.unlockBeforeDelete) { diff --git a/services/filestore/test/acceptance/js/FilestoreTests.js b/services/filestore/test/acceptance/js/FilestoreTests.js index dc31d1a83c..272ffb52bd 100644 --- a/services/filestore/test/acceptance/js/FilestoreTests.js +++ b/services/filestore/test/acceptance/js/FilestoreTests.js @@ -453,7 +453,7 @@ describe('Filestore', function() { await TestHelper.expectPersistorToHaveFile( app.persistor, `${Settings.filestore.stores.user_files}-deleted`, - `${projectId}/${fileId}-${date}`, + `${projectId}/${fileId}-${date.toISOString()}`, content ) })