2021-12-14 13:00:35 +00:00
|
|
|
const logger = require('@overleaf/logger')
|
2021-08-16 12:09:52 +00:00
|
|
|
const UpdatesManager = require('./UpdatesManager')
|
|
|
|
const DiffGenerator = require('./DiffGenerator')
|
|
|
|
const DocumentUpdaterManager = require('./DocumentUpdaterManager')
|
|
|
|
const DocstoreManager = require('./DocstoreManager')
|
2021-12-03 13:15:37 +00:00
|
|
|
const Errors = require('./Errors')
|
2021-08-16 12:09:52 +00:00
|
|
|
const PackManager = require('./PackManager')
|
|
|
|
const yazl = require('yazl')
|
|
|
|
const util = require('util')
|
|
|
|
const stream = require('stream')
|
|
|
|
const fs = require('fs')
|
|
|
|
const os = require('os')
|
|
|
|
const Path = require('path')
|
2022-06-10 08:20:57 +00:00
|
|
|
const { packsAreDuplicated } = require('./util/PackUtils')
|
2021-08-16 12:09:52 +00:00
|
|
|
|
|
|
|
const streamPipeline = util.promisify(stream.pipeline)
|
|
|
|
|
|
|
|
// look in docstore or docupdater for the latest version of the document
|
|
|
|
async function getLatestContent(projectId, docId, lastUpdateVersion) {
|
|
|
|
const [docstoreContent, docstoreVersion] =
|
|
|
|
await DocstoreManager.promises.peekDocument(projectId, docId)
|
|
|
|
|
|
|
|
// if docstore is out of date, check for a newer version in docupdater
|
|
|
|
// and return that instead
|
|
|
|
if (docstoreVersion <= lastUpdateVersion) {
|
|
|
|
const [docupdaterContent, docupdaterVersion] =
|
|
|
|
await DocumentUpdaterManager.promises.peekDocument(projectId, docId)
|
|
|
|
if (docupdaterVersion > docstoreVersion) {
|
|
|
|
return [docupdaterContent, docupdaterVersion]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return [docstoreContent, docstoreVersion]
|
|
|
|
}
|
|
|
|
|
|
|
|
async function rewindDoc(projectId, docId, zipfile) {
|
2022-05-16 12:38:18 +00:00
|
|
|
logger.debug({ projectId, docId }, 'rewinding document')
|
2021-08-16 12:09:52 +00:00
|
|
|
|
|
|
|
// Prepare to rewind content
|
|
|
|
|
|
|
|
const docIterator = await PackManager.promises.makeDocIterator(docId)
|
|
|
|
|
|
|
|
const getUpdate = util.promisify(docIterator.next).bind(docIterator)
|
|
|
|
|
|
|
|
const lastUpdate = await getUpdate()
|
|
|
|
if (!lastUpdate) {
|
|
|
|
return null
|
|
|
|
}
|
|
|
|
|
|
|
|
const lastUpdateVersion = lastUpdate.v
|
|
|
|
|
2021-12-03 13:15:37 +00:00
|
|
|
let latestContent
|
|
|
|
let version
|
|
|
|
try {
|
|
|
|
;[latestContent, version] = await getLatestContent(
|
|
|
|
projectId,
|
|
|
|
docId,
|
|
|
|
lastUpdateVersion
|
|
|
|
)
|
|
|
|
} catch (err) {
|
|
|
|
if (err instanceof Errors.NotFoundError) {
|
|
|
|
// Doc not found in docstore. We can't build its history
|
|
|
|
return null
|
|
|
|
} else {
|
|
|
|
throw err
|
|
|
|
}
|
|
|
|
}
|
2021-08-16 12:09:52 +00:00
|
|
|
|
|
|
|
const id = docId.toString()
|
|
|
|
|
|
|
|
const contentEndPath = `${id}/content/end/${version}`
|
|
|
|
zipfile.addBuffer(Buffer.from(latestContent), contentEndPath)
|
|
|
|
|
|
|
|
const metadata = {
|
|
|
|
id,
|
|
|
|
version,
|
|
|
|
content: {
|
|
|
|
end: {
|
|
|
|
path: contentEndPath,
|
|
|
|
version,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
updates: [],
|
|
|
|
}
|
|
|
|
|
|
|
|
let content = latestContent
|
|
|
|
let v = version
|
|
|
|
let update = lastUpdate
|
2022-06-10 08:20:57 +00:00
|
|
|
let previousUpdate = null
|
2021-08-16 12:09:52 +00:00
|
|
|
|
|
|
|
while (update) {
|
2022-06-10 08:20:57 +00:00
|
|
|
if (packsAreDuplicated(update, previousUpdate)) {
|
2022-06-10 10:08:20 +00:00
|
|
|
previousUpdate = update
|
|
|
|
update = await getUpdate()
|
2022-06-10 08:20:57 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2021-08-16 12:09:52 +00:00
|
|
|
const updatePath = `${id}/updates/${update.v}`
|
|
|
|
|
|
|
|
try {
|
|
|
|
content = DiffGenerator.rewindUpdate(content, update)
|
2022-06-23 10:13:58 +00:00
|
|
|
// filter out any known "broken ops" as these may be recoverable
|
|
|
|
update.op = update.op.filter(op => !op.broken)
|
|
|
|
// only store the update in the zip file when we have applied it
|
|
|
|
// successfully, and after filtering out broken ops.
|
|
|
|
zipfile.addBuffer(Buffer.from(JSON.stringify(update)), updatePath, {
|
|
|
|
mtime: new Date(update.meta.start_ts),
|
|
|
|
})
|
2021-08-16 12:09:52 +00:00
|
|
|
v = update.v
|
|
|
|
} catch (e) {
|
|
|
|
e.attempted_update = update // keep a record of the attempted update
|
2021-12-01 16:25:18 +00:00
|
|
|
logger.warn({ projectId, docId, err: e }, 'rewind error')
|
2021-08-16 12:09:52 +00:00
|
|
|
break // stop attempting to rewind on error
|
|
|
|
}
|
|
|
|
|
|
|
|
metadata.updates.push({
|
|
|
|
path: updatePath,
|
|
|
|
version: update.v,
|
|
|
|
ts: update.meta.start_ts,
|
|
|
|
doc_length: content.length,
|
|
|
|
})
|
2022-06-10 08:20:57 +00:00
|
|
|
previousUpdate = update
|
2021-08-16 12:09:52 +00:00
|
|
|
update = await getUpdate()
|
|
|
|
}
|
|
|
|
|
|
|
|
const contentStartPath = `${id}/content/start/${v}`
|
|
|
|
zipfile.addBuffer(Buffer.from(content), contentStartPath)
|
|
|
|
|
|
|
|
metadata.content.start = {
|
|
|
|
path: contentStartPath,
|
|
|
|
version: v,
|
|
|
|
}
|
|
|
|
|
|
|
|
return metadata
|
|
|
|
}
|
|
|
|
|
|
|
|
async function generateZip(projectId, zipfile) {
|
|
|
|
await UpdatesManager.promises.processUncompressedUpdatesForProject(projectId)
|
|
|
|
const docIds = await PackManager.promises.findAllDocsInProject(projectId)
|
|
|
|
const manifest = { projectId, docs: [] }
|
|
|
|
for (const docId of docIds) {
|
|
|
|
const doc = await rewindDoc(projectId, docId, zipfile)
|
|
|
|
if (doc) {
|
|
|
|
manifest.docs.push(doc)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
zipfile.addBuffer(
|
|
|
|
Buffer.from(JSON.stringify(manifest, null, 2)),
|
|
|
|
'manifest.json'
|
|
|
|
)
|
2022-06-30 08:21:50 +00:00
|
|
|
zipfile.addBuffer(
|
|
|
|
Buffer.from(
|
|
|
|
`History Migration Data
|
|
|
|
|
|
|
|
This zip file contains a copy of the raw history for this project.
|
|
|
|
|
|
|
|
If this file is present in a project it means the history could not
|
|
|
|
be fully recovered or migrated.
|
|
|
|
|
|
|
|
A new history should have been created starting at the datestamp of
|
|
|
|
this file.`
|
|
|
|
),
|
|
|
|
'README.txt'
|
|
|
|
)
|
2021-08-16 12:09:52 +00:00
|
|
|
zipfile.end()
|
|
|
|
}
|
|
|
|
|
|
|
|
async function exportProject(projectId, zipPath) {
|
|
|
|
const zipfile = new yazl.ZipFile()
|
|
|
|
const pipeline = streamPipeline(
|
|
|
|
zipfile.outputStream,
|
|
|
|
fs.createWriteStream(zipPath)
|
|
|
|
)
|
|
|
|
await generateZip(projectId, zipfile)
|
|
|
|
await pipeline
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Create a temporary directory for use with exportProject()
|
|
|
|
*/
|
|
|
|
async function makeTempDirectory() {
|
|
|
|
const tmpdir = await fs.promises.mkdtemp(
|
|
|
|
(await fs.promises.realpath(os.tmpdir())) + Path.sep
|
|
|
|
)
|
|
|
|
return tmpdir
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Clean up a temporary directory made with makeTempDirectory()
|
|
|
|
*/
|
|
|
|
function cleanupTempDirectory(tmpdir) {
|
2022-04-13 12:31:32 +00:00
|
|
|
fs.promises.rm(tmpdir, { recursive: true, force: true }).catch(err => {
|
2021-10-27 09:49:18 +00:00
|
|
|
if (err) {
|
|
|
|
logger.warn({ err, tmpdir }, 'Failed to clean up temp directory')
|
|
|
|
}
|
2021-08-16 12:09:52 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = {
|
|
|
|
exportProject: util.callbackify(exportProject),
|
|
|
|
makeTempDirectory: util.callbackify(makeTempDirectory),
|
|
|
|
cleanupTempDirectory,
|
|
|
|
}
|