mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-21 20:47:08 -05:00
Merge pull request #20302 from overleaf/jpa-object-persistor-metrics
[object-persistor] add more metrics to getObjectStream and sendStream GitOrigin-RevId: 9fe6b9d205de6ad27838f91d92d2b3a3d6c2f129
This commit is contained in:
parent
8fa676082e
commit
baaac44172
4 changed files with 87 additions and 27 deletions
|
@ -3,9 +3,9 @@ const fs = require('fs')
|
||||||
const fsPromises = require('fs/promises')
|
const fsPromises = require('fs/promises')
|
||||||
const globCallbacks = require('glob')
|
const globCallbacks = require('glob')
|
||||||
const Path = require('path')
|
const Path = require('path')
|
||||||
|
const { PassThrough } = require('stream')
|
||||||
const { pipeline } = require('stream/promises')
|
const { pipeline } = require('stream/promises')
|
||||||
const { promisify } = require('util')
|
const { promisify } = require('util')
|
||||||
const Metrics = require('@overleaf/metrics')
|
|
||||||
|
|
||||||
const AbstractPersistor = require('./AbstractPersistor')
|
const AbstractPersistor = require('./AbstractPersistor')
|
||||||
const { ReadError, WriteError } = require('./Errors')
|
const { ReadError, WriteError } = require('./Errors')
|
||||||
|
@ -63,6 +63,10 @@ module.exports = class FSPersistor extends AbstractPersistor {
|
||||||
|
|
||||||
// opts may be {start: Number, end: Number}
|
// opts may be {start: Number, end: Number}
|
||||||
async getObjectStream(location, name, opts = {}) {
|
async getObjectStream(location, name, opts = {}) {
|
||||||
|
const observer = new PersistorHelper.ObserverStream({
|
||||||
|
metric: 'fs.ingress', // ingress to us from disk
|
||||||
|
bucket: location,
|
||||||
|
})
|
||||||
const fsPath = this._getFsPath(location, name)
|
const fsPath = this._getFsPath(location, name)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -76,7 +80,11 @@ module.exports = class FSPersistor extends AbstractPersistor {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
return fs.createReadStream(null, opts)
|
const stream = fs.createReadStream(null, opts)
|
||||||
|
// Return a PassThrough stream with a minimal interface. It will buffer until the caller starts reading. It will emit errors from the source stream (Stream.pipeline passes errors along).
|
||||||
|
const pass = new PassThrough()
|
||||||
|
pipeline(stream, observer, pass).catch(() => {})
|
||||||
|
return pass
|
||||||
}
|
}
|
||||||
|
|
||||||
async getRedirectUrl() {
|
async getRedirectUrl() {
|
||||||
|
@ -221,22 +229,25 @@ module.exports = class FSPersistor extends AbstractPersistor {
|
||||||
}
|
}
|
||||||
|
|
||||||
async _writeStreamToTempFile(location, stream, opts = {}) {
|
async _writeStreamToTempFile(location, stream, opts = {}) {
|
||||||
|
const observerOptions = {
|
||||||
|
metric: 'fs.egress', // egress from us to disk
|
||||||
|
bucket: location,
|
||||||
|
}
|
||||||
|
const observer = new PersistorHelper.ObserverStream(observerOptions)
|
||||||
|
|
||||||
const tempDirPath = await fsPromises.mkdtemp(Path.join(location, 'tmp-'))
|
const tempDirPath = await fsPromises.mkdtemp(Path.join(location, 'tmp-'))
|
||||||
const tempFilePath = Path.join(tempDirPath, 'uploaded-file')
|
const tempFilePath = Path.join(tempDirPath, 'uploaded-file')
|
||||||
|
|
||||||
const transforms = []
|
const transforms = [observer]
|
||||||
let md5Observer
|
let md5Observer
|
||||||
if (opts.sourceMd5) {
|
if (opts.sourceMd5) {
|
||||||
md5Observer = createMd5Observer()
|
md5Observer = createMd5Observer()
|
||||||
transforms.push(md5Observer.transform)
|
transforms.push(md5Observer.transform)
|
||||||
}
|
}
|
||||||
|
|
||||||
const timer = new Metrics.Timer('writingFile')
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const writeStream = fs.createWriteStream(tempFilePath)
|
const writeStream = fs.createWriteStream(tempFilePath)
|
||||||
await pipeline(stream, ...transforms, writeStream)
|
await pipeline(stream, ...transforms, writeStream)
|
||||||
timer.done()
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
await this._cleanupTempFile(tempFilePath)
|
await this._cleanupTempFile(tempFilePath)
|
||||||
throw new WriteError(
|
throw new WriteError(
|
||||||
|
|
|
@ -47,9 +47,9 @@ module.exports = class GcsPersistor extends AbstractPersistor {
|
||||||
|
|
||||||
async sendStream(bucketName, key, readStream, opts = {}) {
|
async sendStream(bucketName, key, readStream, opts = {}) {
|
||||||
try {
|
try {
|
||||||
// egress from us to gcs
|
|
||||||
const observeOptions = {
|
const observeOptions = {
|
||||||
metric: 'gcs.egress',
|
metric: 'gcs.egress', // egress from us to GCS
|
||||||
|
bucket: bucketName,
|
||||||
}
|
}
|
||||||
|
|
||||||
let sourceMd5 = opts.sourceMd5
|
let sourceMd5 = opts.sourceMd5
|
||||||
|
@ -104,6 +104,10 @@ module.exports = class GcsPersistor extends AbstractPersistor {
|
||||||
}
|
}
|
||||||
|
|
||||||
async getObjectStream(bucketName, key, opts = {}) {
|
async getObjectStream(bucketName, key, opts = {}) {
|
||||||
|
const observer = new PersistorHelper.ObserverStream({
|
||||||
|
metric: 'gcs.ingress', // ingress to us from GCS
|
||||||
|
bucket: bucketName,
|
||||||
|
})
|
||||||
const stream = this.storage
|
const stream = this.storage
|
||||||
.bucket(bucketName)
|
.bucket(bucketName)
|
||||||
.file(key)
|
.file(key)
|
||||||
|
@ -133,12 +137,7 @@ module.exports = class GcsPersistor extends AbstractPersistor {
|
||||||
ReadError
|
ReadError
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
// Return a PassThrough stream with a minimal interface. It will buffer until the caller starts reading. It will emit errors from the source stream (Stream.pipeline passes errors along).
|
||||||
// ingress to us from gcs
|
|
||||||
const observer = new PersistorHelper.ObserverStream({
|
|
||||||
metric: 'gcs.ingress',
|
|
||||||
})
|
|
||||||
|
|
||||||
const pass = new PassThrough()
|
const pass = new PassThrough()
|
||||||
pipeline(stream, observer, pass).catch(() => {})
|
pipeline(stream, observer, pass).catch(() => {})
|
||||||
return pass
|
return pass
|
||||||
|
|
|
@ -5,31 +5,82 @@ const Logger = require('@overleaf/logger')
|
||||||
const Metrics = require('@overleaf/metrics')
|
const Metrics = require('@overleaf/metrics')
|
||||||
const { WriteError, NotFoundError } = require('./Errors')
|
const { WriteError, NotFoundError } = require('./Errors')
|
||||||
|
|
||||||
|
const _128KiB = 128 * 1024
|
||||||
|
const TIMING_BUCKETS = [
|
||||||
|
0, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000,
|
||||||
|
]
|
||||||
|
const SIZE_BUCKETS = [
|
||||||
|
0,
|
||||||
|
1_000,
|
||||||
|
10_000,
|
||||||
|
100_000,
|
||||||
|
_128KiB,
|
||||||
|
1_000_000,
|
||||||
|
10_000_000,
|
||||||
|
50_000_000,
|
||||||
|
100_000_000,
|
||||||
|
]
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Observes data that passes through and optionally computes hash for content.
|
* Observes data that passes through and optionally computes hash for content.
|
||||||
*/
|
*/
|
||||||
class ObserverStream extends Stream.Transform {
|
class ObserverStream extends Stream.Transform {
|
||||||
/**
|
/**
|
||||||
* @param {string} metric prefix for metrics
|
* @param {string} metric prefix for metrics
|
||||||
|
* @param {string} bucket name of source/target bucket
|
||||||
* @param {string} hash optional hash algorithm, e.g. 'md5'
|
* @param {string} hash optional hash algorithm, e.g. 'md5'
|
||||||
*/
|
*/
|
||||||
constructor({ metric, hash = '' }) {
|
constructor({ metric, bucket, hash = '' }) {
|
||||||
super({ autoDestroy: true })
|
super({ autoDestroy: true })
|
||||||
|
|
||||||
this.bytes = 0
|
this.bytes = 0
|
||||||
|
this.start = performance.now()
|
||||||
|
|
||||||
if (hash) {
|
if (hash) {
|
||||||
this.hash = Crypto.createHash(hash)
|
this.hash = Crypto.createHash(hash)
|
||||||
}
|
}
|
||||||
|
|
||||||
const onEnd = () => {
|
const onEnd = status => {
|
||||||
Metrics.count(metric, this.bytes)
|
const size = this.bytes < _128KiB ? 'lt-128KiB' : 'gte-128KiB'
|
||||||
|
const labels = { size, bucket, status }
|
||||||
|
// Keep this counter metric to allow rendering long-term charts.
|
||||||
|
Metrics.count(metric, this.bytes, 1, labels)
|
||||||
|
Metrics.inc(`${metric}.hit`, 1, labels)
|
||||||
|
|
||||||
|
if (status === 'error') return
|
||||||
|
// The below metrics are only relevant for successfully fetched objects.
|
||||||
|
|
||||||
|
Metrics.histogram(`${metric}.size`, this.bytes, SIZE_BUCKETS, {
|
||||||
|
status,
|
||||||
|
bucket,
|
||||||
|
})
|
||||||
|
if (this.firstByteAfterMs) {
|
||||||
|
Metrics.histogram(
|
||||||
|
`${metric}.latency.first-byte`,
|
||||||
|
this.firstByteAfterMs,
|
||||||
|
TIMING_BUCKETS,
|
||||||
|
labels
|
||||||
|
)
|
||||||
|
}
|
||||||
|
Metrics.histogram(
|
||||||
|
`${metric}.latency`,
|
||||||
|
this.#getMsSinceStart(),
|
||||||
|
TIMING_BUCKETS,
|
||||||
|
labels
|
||||||
|
)
|
||||||
}
|
}
|
||||||
this.once('error', onEnd)
|
this.once('error', () => onEnd('error'))
|
||||||
this.once('end', onEnd)
|
this.once('end', () => onEnd('success'))
|
||||||
|
}
|
||||||
|
|
||||||
|
#getMsSinceStart() {
|
||||||
|
return performance.now() - this.start
|
||||||
}
|
}
|
||||||
|
|
||||||
_transform(chunk, encoding, done) {
|
_transform(chunk, encoding, done) {
|
||||||
|
if (this.bytes === 0) {
|
||||||
|
this.firstByteAfterMs = this.#getMsSinceStart()
|
||||||
|
}
|
||||||
if (this.hash) {
|
if (this.hash) {
|
||||||
this.hash.update(chunk)
|
this.hash.update(chunk)
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,9 +30,9 @@ module.exports = class S3Persistor extends AbstractPersistor {
|
||||||
|
|
||||||
async sendStream(bucketName, key, readStream, opts = {}) {
|
async sendStream(bucketName, key, readStream, opts = {}) {
|
||||||
try {
|
try {
|
||||||
// egress from us to S3
|
|
||||||
const observeOptions = {
|
const observeOptions = {
|
||||||
metric: 's3.egress',
|
metric: 's3.egress', // egress from us to S3
|
||||||
|
bucket: bucketName,
|
||||||
}
|
}
|
||||||
|
|
||||||
const observer = new PersistorHelper.ObserverStream(observeOptions)
|
const observer = new PersistorHelper.ObserverStream(observeOptions)
|
||||||
|
@ -85,6 +85,10 @@ module.exports = class S3Persistor extends AbstractPersistor {
|
||||||
if (opts.start != null && opts.end != null) {
|
if (opts.start != null && opts.end != null) {
|
||||||
params.Range = `bytes=${opts.start}-${opts.end}`
|
params.Range = `bytes=${opts.start}-${opts.end}`
|
||||||
}
|
}
|
||||||
|
const observer = new PersistorHelper.ObserverStream({
|
||||||
|
metric: 's3.ingress', // ingress from S3 to us
|
||||||
|
bucket: bucketName,
|
||||||
|
})
|
||||||
|
|
||||||
const req = this._getClientForBucket(bucketName).getObject(params)
|
const req = this._getClientForBucket(bucketName).getObject(params)
|
||||||
const stream = req.createReadStream()
|
const stream = req.createReadStream()
|
||||||
|
@ -116,12 +120,7 @@ module.exports = class S3Persistor extends AbstractPersistor {
|
||||||
ReadError
|
ReadError
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
// Return a PassThrough stream with a minimal interface. It will buffer until the caller starts reading. It will emit errors from the source stream (Stream.pipeline passes errors along).
|
||||||
// ingress from S3 to us
|
|
||||||
const observer = new PersistorHelper.ObserverStream({
|
|
||||||
metric: 's3.ingress',
|
|
||||||
})
|
|
||||||
|
|
||||||
const pass = new PassThrough()
|
const pass = new PassThrough()
|
||||||
pipeline(stream, observer, pass, err => {
|
pipeline(stream, observer, pass, err => {
|
||||||
if (err) req.abort()
|
if (err) req.abort()
|
||||||
|
|
Loading…
Reference in a new issue