mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-29 06:03:42 -05:00
Merge pull request #12071 from overleaf/bg-track-changes-debug-script
track-changes debug script for history migration GitOrigin-RevId: c95b3b4e3bdfd7c75751caae8ff37838e4a78511
This commit is contained in:
parent
f7131b720b
commit
b7527ab6fe
1 changed files with 311 additions and 0 deletions
311
services/web/scripts/history/debug_history.js
Normal file
311
services/web/scripts/history/debug_history.js
Normal file
|
@ -0,0 +1,311 @@
|
||||||
|
// Script to debug the track-changes history of the documents in a project.
|
||||||
|
// Usage:
|
||||||
|
// node debug_history.js --project-id=<project_id>
|
||||||
|
//
|
||||||
|
// Example output:
|
||||||
|
// $ node scripts/debug_history.js --project-id=63ff3adc06177192f18a6b38
|
||||||
|
// Using default settings from /overleaf/services/track-changes/config/settings.defaults.js
|
||||||
|
// Set UV_THREADPOOL_SIZE=16
|
||||||
|
// project 63ff3adc06177192f18a6b38 docId 63ff3adc06177192f18a6b3d OK
|
||||||
|
// project 63ff3adc06177192f18a6b38 docId 63ff3b08de41e3b0989c1720 FAILED
|
||||||
|
// {"action":"rewinding","version":7,"meta":{"start_ts":1677671465447,"end_ts":1677671465447,"user_id":"632ae106f9a6dd002505765b"},
|
||||||
|
// "ops":[{"action":"rewindOp","contentLength":24,"op":{"p":32,"d":6},"errors":[{"message":"invalid offset rewinding delete,
|
||||||
|
// truncating to content length","op":{"p":32,"d":6},"contentLength":24}]}],"status":"failed"}
|
||||||
|
|
||||||
|
/* eslint-disable camelcase */
|
||||||
|
const { waitForDb } = require('../../../track-changes/app/js/mongodb')
|
||||||
|
const PackManager = require('../../../track-changes/app/js/PackManager')
|
||||||
|
const {
|
||||||
|
packsAreDuplicated,
|
||||||
|
} = require('../../../track-changes/app/js/util/PackUtils')
|
||||||
|
const {
|
||||||
|
ConsistencyError,
|
||||||
|
} = require('../../../track-changes/app/js/DiffGenerator')
|
||||||
|
const DocumentUpdaterManager = require('../../../track-changes/app/js/DocumentUpdaterManager')
|
||||||
|
const DocstoreManager = require('../../../track-changes/app/js/DocstoreManager')
|
||||||
|
const Errors = require('../../../track-changes/app/js/Errors')
|
||||||
|
const minimist = require('minimist')
|
||||||
|
const util = require('util')
|
||||||
|
const logger = require('@overleaf/logger')
|
||||||
|
logger.initialize('debug-history')
|
||||||
|
// disable logging to stdout from internal modules
|
||||||
|
logger.logger.streams = []
|
||||||
|
|
||||||
|
const options = {
|
||||||
|
boolean: ['verbose', 'raw', 'help'],
|
||||||
|
string: ['project-id'],
|
||||||
|
alias: {
|
||||||
|
'project-id': 'p',
|
||||||
|
verbose: 'v',
|
||||||
|
raw: 'r',
|
||||||
|
help: 'h',
|
||||||
|
},
|
||||||
|
default: {},
|
||||||
|
}
|
||||||
|
const argv = minimist(process.argv.slice(2), options)
|
||||||
|
|
||||||
|
function usage() {
|
||||||
|
console.log(
|
||||||
|
`Usage: ${process.argv[1]} [--project-id=<project_id>] [--verbose] [--raw]`
|
||||||
|
)
|
||||||
|
process.exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// look in docstore or docupdater for the latest version of the document
|
||||||
|
async function getLatestContent(projectId, docId, lastUpdateVersion) {
|
||||||
|
const [docstoreContent, docstoreVersion] =
|
||||||
|
await DocstoreManager.promises.peekDocument(projectId, docId)
|
||||||
|
|
||||||
|
// if docstore is out of date, check for a newer version in docupdater
|
||||||
|
// and return that instead
|
||||||
|
if (docstoreVersion <= lastUpdateVersion) {
|
||||||
|
const [docupdaterContent, docupdaterVersion] =
|
||||||
|
await DocumentUpdaterManager.promises.peekDocument(projectId, docId)
|
||||||
|
if (docupdaterVersion > docstoreVersion) {
|
||||||
|
return [docupdaterContent, docupdaterVersion]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return [docstoreContent, docstoreVersion]
|
||||||
|
}
|
||||||
|
|
||||||
|
// This class is used to write a record of all the operations that have been applied to a document
|
||||||
|
class LogAppliedOps {
|
||||||
|
constructor() {
|
||||||
|
this.result = []
|
||||||
|
}
|
||||||
|
|
||||||
|
// used to log the initial state of the document
|
||||||
|
start(action, latestContent, version) {
|
||||||
|
this.result.push({
|
||||||
|
action,
|
||||||
|
latestContentLength: latestContent.length,
|
||||||
|
latestContent: argv.raw ? latestContent : undefined,
|
||||||
|
version,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// used to log a new document update
|
||||||
|
update(action, update) {
|
||||||
|
this._finalize()
|
||||||
|
this.opResults = []
|
||||||
|
this.currentResult = {
|
||||||
|
action,
|
||||||
|
version: update.v,
|
||||||
|
meta: update.meta,
|
||||||
|
ops: this.opResults,
|
||||||
|
}
|
||||||
|
this.result.push(this.currentResult)
|
||||||
|
}
|
||||||
|
|
||||||
|
// used to log an operation that has been applied to the document
|
||||||
|
op(action, content, op) {
|
||||||
|
this.currentOp = {
|
||||||
|
action,
|
||||||
|
contentLength: content.length,
|
||||||
|
content: argv.raw ? content : undefined,
|
||||||
|
op: this._filterOp(op),
|
||||||
|
}
|
||||||
|
this.opResults.push(this.currentOp)
|
||||||
|
}
|
||||||
|
|
||||||
|
// used to log an error that occurred while applying an operation
|
||||||
|
opError(message, content, op, err) {
|
||||||
|
this.currentOp.errors = this.currentOp.errors || []
|
||||||
|
this.currentOp.errors.push({
|
||||||
|
message,
|
||||||
|
op: this._filterOp(op),
|
||||||
|
contentLength: content.length,
|
||||||
|
content: argv.raw ? content : undefined,
|
||||||
|
err,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// sets the status of the current update to 'success' or 'failed'
|
||||||
|
// depending on whether any errors were logged
|
||||||
|
_finalize() {
|
||||||
|
if (!this.currentResult) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
const errors = this.opResults.some(op => op.errors)
|
||||||
|
this.currentResult.status = errors ? 'failed' : 'success'
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns the final result of the log
|
||||||
|
end() {
|
||||||
|
this._finalize()
|
||||||
|
return this.result
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a new object with the same keys as op, but with the i and d
|
||||||
|
// fields replaced by their lengths when present. This is used to filter
|
||||||
|
// out the contents of the i and d fields of an operation, to redact
|
||||||
|
// document content.
|
||||||
|
_filterOp(op) {
|
||||||
|
const newOp = {}
|
||||||
|
for (const key of Object.keys(op)) {
|
||||||
|
if (!argv.raw && (key === 'i' || key === 'd')) {
|
||||||
|
newOp[key] = op[key].length
|
||||||
|
} else {
|
||||||
|
newOp[key] = op[key]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return newOp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is the rewindOp function from track-changes, modified to log
|
||||||
|
// the operation and any errors.
|
||||||
|
function rewindOp(content, op, log) {
|
||||||
|
if (op.i != null) {
|
||||||
|
// ShareJS will accept an op where p > content.length when applied,
|
||||||
|
// and it applies as though p == content.length. However, the op is
|
||||||
|
// passed to us with the original p > content.length. Detect if that
|
||||||
|
// is the case with this op, and shift p back appropriately to match
|
||||||
|
// ShareJS if so.
|
||||||
|
let { p } = op
|
||||||
|
const max_p = content.length - op.i.length
|
||||||
|
if (p > max_p) {
|
||||||
|
log.opError(
|
||||||
|
'invalid offset rewinding insert, truncating to content length',
|
||||||
|
content,
|
||||||
|
op
|
||||||
|
)
|
||||||
|
p = max_p
|
||||||
|
}
|
||||||
|
const textToBeRemoved = content.slice(p, p + op.i.length)
|
||||||
|
if (op.i !== textToBeRemoved) {
|
||||||
|
log.opError(
|
||||||
|
'inserted content does not match text to be removed',
|
||||||
|
content,
|
||||||
|
op
|
||||||
|
)
|
||||||
|
throw new ConsistencyError(
|
||||||
|
`Inserted content, '${op.i}', does not match text to be removed, '${textToBeRemoved}'`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
return content.slice(0, p) + content.slice(p + op.i.length)
|
||||||
|
} else if (op.d != null) {
|
||||||
|
if (op.p > content.length) {
|
||||||
|
log.opError(
|
||||||
|
'invalid offset rewinding delete, truncating to content length',
|
||||||
|
content,
|
||||||
|
op
|
||||||
|
)
|
||||||
|
}
|
||||||
|
return content.slice(0, op.p) + op.d + content.slice(op.p)
|
||||||
|
} else {
|
||||||
|
return content
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is the rewindDoc function from track-changes, modified to log all
|
||||||
|
// operations that are applied to the document.
|
||||||
|
async function rewindDoc(projectId, docId) {
|
||||||
|
const log = new LogAppliedOps()
|
||||||
|
// Prepare to rewind content
|
||||||
|
const docIterator = await PackManager.promises.makeDocIterator(docId)
|
||||||
|
const getUpdate = util.promisify(docIterator.next).bind(docIterator)
|
||||||
|
|
||||||
|
const lastUpdate = await getUpdate()
|
||||||
|
if (!lastUpdate) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
const lastUpdateVersion = lastUpdate.v
|
||||||
|
|
||||||
|
let latestContent
|
||||||
|
let version
|
||||||
|
try {
|
||||||
|
;[latestContent, version] = await getLatestContent(
|
||||||
|
projectId,
|
||||||
|
docId,
|
||||||
|
lastUpdateVersion
|
||||||
|
)
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof Errors.NotFoundError) {
|
||||||
|
// Doc not found in docstore. We can't build its history
|
||||||
|
return null
|
||||||
|
} else {
|
||||||
|
throw err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.start('load-doc', latestContent, version)
|
||||||
|
|
||||||
|
let content = latestContent
|
||||||
|
let update = lastUpdate
|
||||||
|
let previousUpdate = null
|
||||||
|
|
||||||
|
while (update) {
|
||||||
|
if (packsAreDuplicated(update, previousUpdate)) {
|
||||||
|
previousUpdate = update
|
||||||
|
update = await getUpdate()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
log.update('rewinding', update)
|
||||||
|
for (let i = update.op.length - 1; i >= 0; i--) {
|
||||||
|
const op = update.op[i]
|
||||||
|
if (op.broken === true) {
|
||||||
|
log.op('skipped', op)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
log.op('rewindOp', content, op)
|
||||||
|
content = rewindOp(content, op, log)
|
||||||
|
} catch (e) {
|
||||||
|
if (e instanceof ConsistencyError && (i = update.op.length - 1)) {
|
||||||
|
// catch known case where the last op in an array has been
|
||||||
|
// merged into a later op
|
||||||
|
op.broken = true
|
||||||
|
log.opError('marking broken', content, op)
|
||||||
|
} else {
|
||||||
|
log.opError('failed', content, op, e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
previousUpdate = update
|
||||||
|
update = await getUpdate()
|
||||||
|
}
|
||||||
|
return log.end()
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const projectId = argv['project-id']
|
||||||
|
if (!projectId || argv.help) {
|
||||||
|
usage()
|
||||||
|
process.exit(1)
|
||||||
|
}
|
||||||
|
const docIds = await PackManager.promises.findAllDocsInProject(projectId)
|
||||||
|
if (!docIds.length) {
|
||||||
|
console.log('No docs found for project', projectId)
|
||||||
|
process.exit(0)
|
||||||
|
}
|
||||||
|
let errorCount = 0
|
||||||
|
for (const docId of docIds) {
|
||||||
|
const result = await rewindDoc(projectId, docId)
|
||||||
|
const failed = result.filter(r => r.status === 'failed')
|
||||||
|
errorCount += failed.length
|
||||||
|
if (argv.verbose) {
|
||||||
|
console.log(JSON.stringify({ projectId, docId, result }, null, 2))
|
||||||
|
} else {
|
||||||
|
console.log(
|
||||||
|
'project',
|
||||||
|
projectId,
|
||||||
|
'docId',
|
||||||
|
docId,
|
||||||
|
failed.length === 0 ? 'OK' : 'FAILED'
|
||||||
|
)
|
||||||
|
for (const f of failed) {
|
||||||
|
console.log(JSON.stringify(f))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
process.exit(errorCount > 0 ? 1 : 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
waitForDb()
|
||||||
|
.then(main)
|
||||||
|
.catch(err => {
|
||||||
|
console.error(err)
|
||||||
|
process.exit(1)
|
||||||
|
})
|
Loading…
Reference in a new issue