Merge pull request #5670 from overleaf/jpa-cdn-pre-compress

[web] cdn_upload: compress assets prior to uploading them to GCS

GitOrigin-RevId: a9b0970beb124d20bd2ffe21d30a674ffafd6258
This commit is contained in:
Jakob Ackermann 2021-11-03 12:04:29 +01:00 committed by Copybot
parent 115436925c
commit cc78541714
4 changed files with 83 additions and 32 deletions

View file

@ -1,47 +1,22 @@
#!/bin/bash
set -e
function upload_with_content_type() {
content_type=$1
bucket=$2
shift 2
content_type_options=""
if [[ "$content_type" != "-" ]]; then
content_type_options="-h Content-Type:${content_type};charset=utf-8"
fi
# DOCS for gsutil -- it does not have long command line flags!
## global flags
# -h NAME:VALUE add header, can occur multiples times
# -m upload with multiple threads
## rsync flags
# -r traverse into directories recursively
# -x Python regex for excluding files from the sync
gsutil \
-h "Cache-Control:public, max-age=31536000" \
${content_type_options} \
-m \
rsync \
-r \
"$@" \
"/tmp/public/" \
"${bucket}/public/"
}
function upload_into_bucket() {
bucket=$1
# stylesheets
upload_with_content_type 'text/css' "$bucket" \
-x '.+(?<!\.css)$'
bin/cdn_upload_batch 'text/css' "$bucket" '.css' \
-x '.+(?<!\.css)$' &
# javascript files
upload_with_content_type 'application/javascript' "$bucket" \
-x '.+(?<!\.js)$'
bin/cdn_upload_batch 'application/javascript' "$bucket" '.js' \
-x '.+(?<!\.js)$' &
# the rest
upload_with_content_type '-' "$bucket" \
-x '.+\.(css|js)$'
bin/cdn_upload_batch '-' "$bucket" '-' \
-x '.+\.(css|js)$' &
wait
}
# Upload to staging CDN if branch is either 'master' or 'staging-master' or main variants
@ -51,6 +26,8 @@ if [[ "$BRANCH_NAME" == "master" || "$BRANCH_NAME" == "staging-master" || "$BRAN
# delete source maps
find /tmp/public -name '*.js.map' -delete
bin/compress_assets
upload_into_bucket $CDN_STAG
# Only upload to production CDN if branch is
if [[ "$BRANCH_NAME" == "master" || "$BRANCH_NAME" == "main" ]]; then

View file

@ -0,0 +1,44 @@
#!/bin/bash
set -e
content_type=$1
bucket=$2
text_extension=$3
shift 3
content_type_options=""
if [[ "$content_type" != "-" ]]; then
content_type_options="-h Content-Type:${content_type};charset=utf-8"
fi
# DOCS for gsutil -- it does not have long command line flags!
## global flags
# -h NAME:VALUE add header, can occur multiples times
# -m upload with multiple threads
## rsync flags
# -c use checksums for determining changed files (mtime is not stable)
# -r traverse into directories recursively
# -x Python regex for excluding files from the sync
if [[ "$text_extension" == "-" || $(find /tmp/public -type f -name "*$text_extension" | wc -l) != "0" ]]; then
# Potentially skip upload of non-compressed .js/.css files.
gsutil \
-h "Cache-Control:public, max-age=31536000" \
${content_type_options} \
-m \
rsync \
-c \
-r \
"$@" \
"/tmp/public/" \
"${bucket}/public/"
fi
gsutil \
-h "Cache-Control:public, max-age=31536000" \
-h "Content-Encoding:gzip" \
${content_type_options} \
-m \
rsync \
-c \
-r \
"$@" \
"/tmp/compressed/public/" \
"${bucket}/public/"

View file

@ -0,0 +1,14 @@
#!/usr/bin/env bash
set -e -o pipefail
SCRIPT_PATH=$(realpath "${BASH_SOURCE[0]}")
SCRIPT_DIR=$(dirname "$SCRIPT_PATH")
pushd /tmp
find public/ -type d | sed 's!^!compressed/!' | xargs mkdir --parents
find public/ -type f -print0 \
| xargs --null --max-args 20 --max-procs 10 "$SCRIPT_DIR/compress_batch_of_assets"
popd

View file

@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -e
for file in "$@"; do
file_gzipped="compressed/$file"
gzip -9 --no-name --stdout "$file" > "$file_gzipped"
before=$(stat -c%s "$file")
after=$(stat -c%s "$file_gzipped")
if [[ "$after" -ge "$before" ]]; then
rm "$file_gzipped"
else
rm "$file"
fi
done