mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-21 20:47:08 -05:00
Merge pull request #9583 from overleaf/msm-script-project-size
[web] Script to count project sizes GitOrigin-RevId: a8ff5e083147fe1ad288f1eff023984ae19e3a86
This commit is contained in:
parent
9c5bf713b6
commit
7ce229be0c
1 changed files with 139 additions and 0 deletions
139
services/web/scripts/count_project_size.js
Normal file
139
services/web/scripts/count_project_size.js
Normal file
|
@ -0,0 +1,139 @@
|
|||
const readline = require('readline')
|
||||
const { waitForDb, ObjectId, db } = require('../app/src/infrastructure/mongodb')
|
||||
const ProjectEntityHandler = require('../app/src/Features/Project/ProjectEntityHandler')
|
||||
const ProjectGetter = require('../app/src/Features/Project/ProjectGetter')
|
||||
const Errors = require('../app/src/Features/Errors/Errors')
|
||||
const FileStoreHandler = require('../app/src/Features/FileStore/FileStoreHandler')
|
||||
|
||||
/* eslint-disable no-console */
|
||||
|
||||
// Handles a list of project IDs from stdin, one per line, and outputs the count of files and docs
|
||||
// in the project, along with the aggregated size in bytes for all files and docs.
|
||||
//
|
||||
// It outputs to stderr, so that the logging junk can be piped elsewhere - e.g., running like:
|
||||
// node scripts/count_project_size.js < /tmp/project_ids.txt /dev/null 2> /tmp/output.txt
|
||||
//
|
||||
// The output format is line-per-project with data separated by a single space, containing:
|
||||
// - projectId
|
||||
// - file count
|
||||
// - deleted files count
|
||||
// - doc count
|
||||
// - deleted docs count
|
||||
// - total size in bytes of (non deleted) files
|
||||
// - total size in bytes of (non deleted) docs
|
||||
|
||||
async function countProjectFiles() {
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
})
|
||||
|
||||
for await (const projectId of rl) {
|
||||
try {
|
||||
const project = await ProjectGetter.promises.getProject(projectId)
|
||||
if (!project) {
|
||||
throw new Errors.NotFoundError('project not found')
|
||||
}
|
||||
|
||||
const { files, docs } =
|
||||
ProjectEntityHandler.getAllEntitiesFromProject(project)
|
||||
|
||||
const [fileSize, docSize] = await Promise.all([
|
||||
countFilesSize(files, projectId),
|
||||
countDocsSizes(docs),
|
||||
])
|
||||
|
||||
console.error(
|
||||
projectId,
|
||||
files.length,
|
||||
(project.deletedFiles && project.deletedFiles.length) || 0,
|
||||
docs.length,
|
||||
(project.deletedDocs && project.deletedDocs.length) || 0,
|
||||
fileSize,
|
||||
docSize
|
||||
)
|
||||
} catch (err) {
|
||||
if (err instanceof Errors.NotFoundError) {
|
||||
console.error(projectId, 'NOTFOUND')
|
||||
} else {
|
||||
console.log(projectId, 'ERROR', err.name, err.message)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function countFilesSize(files, projectId) {
|
||||
if (!files?.length > 0) {
|
||||
return 0
|
||||
}
|
||||
|
||||
const ids = files.map(fileObject => fileObject.file._id)
|
||||
|
||||
let totalFileSize = 0
|
||||
|
||||
for (const fileId of ids) {
|
||||
const contentLength = await FileStoreHandler.promises.getFileSize(
|
||||
projectId,
|
||||
fileId
|
||||
)
|
||||
const size = parseInt(contentLength, 10)
|
||||
|
||||
if (isNaN(size)) {
|
||||
throw new Error(
|
||||
`Unable to fetch file size for fileId=${fileId} and projectId=${projectId}`
|
||||
)
|
||||
}
|
||||
|
||||
totalFileSize += size
|
||||
}
|
||||
|
||||
return totalFileSize
|
||||
}
|
||||
|
||||
async function countDocsSizes(docs) {
|
||||
if (!docs?.length > 0) {
|
||||
return 0
|
||||
}
|
||||
const ids = docs.map(docObject => docObject.doc._id)
|
||||
|
||||
let totalDocSize = 0
|
||||
|
||||
for (const docId of ids) {
|
||||
const result = await db.docs.aggregate([
|
||||
{
|
||||
$match: { _id: ObjectId(docId) },
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
lineSizeInBytes: {
|
||||
$reduce: {
|
||||
input: { $ifNull: ['$lines', []] },
|
||||
initialValue: 0,
|
||||
in: {
|
||||
$add: ['$$value', { $strLenBytes: '$$this' }],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
])
|
||||
|
||||
const { lineSizeInBytes } = await result.next()
|
||||
|
||||
if (isNaN(lineSizeInBytes)) {
|
||||
throw new Error(`Unable to fetch 'lineSizeInBytes' for docId=${docId}`)
|
||||
}
|
||||
|
||||
totalDocSize += lineSizeInBytes
|
||||
}
|
||||
return totalDocSize
|
||||
}
|
||||
|
||||
waitForDb()
|
||||
.then(countProjectFiles)
|
||||
.then(() => {
|
||||
process.exit(0)
|
||||
})
|
||||
.catch(err => {
|
||||
console.log('Aiee, something went wrong!', err)
|
||||
process.exit(1)
|
||||
})
|
Loading…
Reference in a new issue