decaffeinate: Convert DiffGenerator.coffee and 17 other files to JS

This commit is contained in:
decaffeinate 2020-02-17 18:34:04 +01:00 committed by mserranom
parent a971c5895b
commit 57345632e0
18 changed files with 2834 additions and 2000 deletions

View file

@ -1,227 +1,293 @@
ConsistencyError = (message) ->
error = new Error(message)
error.name = "ConsistencyError"
error.__proto__ = ConsistencyError.prototype
return error
ConsistencyError.prototype.__proto__ = Error.prototype
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let DiffGenerator;
var ConsistencyError = function(message) {
const error = new Error(message);
error.name = "ConsistencyError";
error.__proto__ = ConsistencyError.prototype;
return error;
};
ConsistencyError.prototype.__proto__ = Error.prototype;
logger = require "logger-sharelatex"
const logger = require("logger-sharelatex");
module.exports = DiffGenerator =
ConsistencyError: ConsistencyError
module.exports = (DiffGenerator = {
ConsistencyError,
rewindUpdate: (content, update) ->
for op, i in update.op by -1 when op.broken isnt true
try
content = DiffGenerator.rewindOp content, op
catch e
if e instanceof ConsistencyError and i = update.op.length - 1
# catch known case where the last op in an array has been
# merged into a later op
logger.error {err: e, update, op: JSON.stringify(op)}, "marking op as broken"
op.broken = true
else
throw e # rethrow the execption
return content
rewindUpdate(content, update) {
for (let j = update.op.length - 1, i = j; j >= 0; j--, i = j) {
const op = update.op[i];
if (op.broken !== true) {
try {
content = DiffGenerator.rewindOp(content, op);
} catch (e) {
if (e instanceof ConsistencyError && (i = update.op.length - 1)) {
// catch known case where the last op in an array has been
// merged into a later op
logger.error({err: e, update, op: JSON.stringify(op)}, "marking op as broken");
op.broken = true;
} else {
throw e; // rethrow the execption
}
}
}
}
return content;
},
rewindOp: (content, op) ->
if op.i?
# ShareJS will accept an op where p > content.length when applied,
# and it applies as though p == content.length. However, the op is
# passed to us with the original p > content.length. Detect if that
# is the case with this op, and shift p back appropriately to match
# ShareJS if so.
p = op.p
max_p = content.length - op.i.length
if p > max_p
logger.warn {max_p, p}, "truncating position to content length"
p = max_p
rewindOp(content, op) {
let p;
if (op.i != null) {
// ShareJS will accept an op where p > content.length when applied,
// and it applies as though p == content.length. However, the op is
// passed to us with the original p > content.length. Detect if that
// is the case with this op, and shift p back appropriately to match
// ShareJS if so.
({ p } = op);
const max_p = content.length - op.i.length;
if (p > max_p) {
logger.warn({max_p, p}, "truncating position to content length");
p = max_p;
}
textToBeRemoved = content.slice(p, p + op.i.length)
if op.i != textToBeRemoved
const textToBeRemoved = content.slice(p, p + op.i.length);
if (op.i !== textToBeRemoved) {
throw new ConsistencyError(
"Inserted content, '#{op.i}', does not match text to be removed, '#{textToBeRemoved}'"
)
return content.slice(0, p) + content.slice(p + op.i.length)
else if op.d?
return content.slice(0, op.p) + op.d + content.slice(op.p)
else
return content
rewindUpdates: (content, updates) ->
for update in updates.reverse()
try
content = DiffGenerator.rewindUpdate(content, update)
catch e
e.attempted_update = update # keep a record of the attempted update
throw e # rethrow the exception
return content
buildDiff: (initialContent, updates) ->
diff = [ u: initialContent ]
for update in updates
diff = DiffGenerator.applyUpdateToDiff diff, update
diff = DiffGenerator.compressDiff diff
return diff
compressDiff: (diff) ->
newDiff = []
for part in diff
lastPart = newDiff[newDiff.length - 1]
if lastPart? and lastPart.meta?.user? and part.meta?.user?
if lastPart.i? and part.i? and lastPart.meta.user.id == part.meta.user.id
lastPart.i += part.i
lastPart.meta.start_ts = Math.min(lastPart.meta.start_ts, part.meta.start_ts)
lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts)
else if lastPart.d? and part.d? and lastPart.meta.user.id == part.meta.user.id
lastPart.d += part.d
lastPart.meta.start_ts = Math.min(lastPart.meta.start_ts, part.meta.start_ts)
lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts)
else
newDiff.push part
else
newDiff.push part
return newDiff
applyOpToDiff: (diff, op, meta) ->
position = 0
remainingDiff = diff.slice()
{consumedDiff, remainingDiff} = DiffGenerator._consumeToOffset(remainingDiff, op.p)
newDiff = consumedDiff
if op.i?
newDiff.push
i: op.i
meta: meta
else if op.d?
{consumedDiff, remainingDiff} = DiffGenerator._consumeDiffAffectedByDeleteOp remainingDiff, op, meta
newDiff.push(consumedDiff...)
newDiff.push(remainingDiff...)
return newDiff
applyUpdateToDiff: (diff, update) ->
for op in update.op when op.broken isnt true
diff = DiffGenerator.applyOpToDiff diff, op, update.meta
return diff
_consumeToOffset: (remainingDiff, totalOffset) ->
consumedDiff = []
position = 0
while part = remainingDiff.shift()
length = DiffGenerator._getLengthOfDiffPart part
if part.d?
consumedDiff.push part
else if position + length >= totalOffset
partOffset = totalOffset - position
if partOffset > 0
consumedDiff.push DiffGenerator._slicePart part, 0, partOffset
if partOffset < length
remainingDiff.unshift DiffGenerator._slicePart part, partOffset
break
else
position += length
consumedDiff.push part
return {
consumedDiff: consumedDiff
remainingDiff: remainingDiff
`Inserted content, '${op.i}', does not match text to be removed, '${textToBeRemoved}'`
);
}
_consumeDiffAffectedByDeleteOp: (remainingDiff, deleteOp, meta) ->
consumedDiff = []
remainingOp = deleteOp
while remainingOp and remainingDiff.length > 0
{newPart, remainingDiff, remainingOp} = DiffGenerator._consumeDeletedPart remainingDiff, remainingOp, meta
consumedDiff.push newPart if newPart?
return {
consumedDiff: consumedDiff
remainingDiff: remainingDiff
return content.slice(0, p) + content.slice(p + op.i.length);
} else if (op.d != null) {
return content.slice(0, op.p) + op.d + content.slice(op.p);
} else {
return content;
}
},
rewindUpdates(content, updates) {
for (let update of Array.from(updates.reverse())) {
try {
content = DiffGenerator.rewindUpdate(content, update);
} catch (e) {
e.attempted_update = update; // keep a record of the attempted update
throw e; // rethrow the exception
}
}
return content;
},
buildDiff(initialContent, updates) {
let diff = [ {u: initialContent} ];
for (let update of Array.from(updates)) {
diff = DiffGenerator.applyUpdateToDiff(diff, update);
}
diff = DiffGenerator.compressDiff(diff);
return diff;
},
compressDiff(diff) {
const newDiff = [];
for (let part of Array.from(diff)) {
const lastPart = newDiff[newDiff.length - 1];
if ((lastPart != null) && ((lastPart.meta != null ? lastPart.meta.user : undefined) != null) && ((part.meta != null ? part.meta.user : undefined) != null)) {
if ((lastPart.i != null) && (part.i != null) && (lastPart.meta.user.id === part.meta.user.id)) {
lastPart.i += part.i;
lastPart.meta.start_ts = Math.min(lastPart.meta.start_ts, part.meta.start_ts);
lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts);
} else if ((lastPart.d != null) && (part.d != null) && (lastPart.meta.user.id === part.meta.user.id)) {
lastPart.d += part.d;
lastPart.meta.start_ts = Math.min(lastPart.meta.start_ts, part.meta.start_ts);
lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts);
} else {
newDiff.push(part);
}
} else {
newDiff.push(part);
}
}
return newDiff;
},
applyOpToDiff(diff, op, meta) {
let consumedDiff;
const position = 0;
let remainingDiff = diff.slice();
({consumedDiff, remainingDiff} = DiffGenerator._consumeToOffset(remainingDiff, op.p));
const newDiff = consumedDiff;
if (op.i != null) {
newDiff.push({
i: op.i,
meta
});
} else if (op.d != null) {
({consumedDiff, remainingDiff} = DiffGenerator._consumeDiffAffectedByDeleteOp(remainingDiff, op, meta));
newDiff.push(...Array.from(consumedDiff || []));
}
_consumeDeletedPart: (remainingDiff, op, meta) ->
part = remainingDiff.shift()
partLength = DiffGenerator._getLengthOfDiffPart part
newDiff.push(...Array.from(remainingDiff || []));
if part.d?
# Skip existing deletes
remainingOp = op
newPart = part
return newDiff;
},
else if partLength > op.d.length
# Only the first bit of the part has been deleted
remainingPart = DiffGenerator._slicePart part, op.d.length
remainingDiff.unshift remainingPart
applyUpdateToDiff(diff, update) {
for (let op of Array.from(update.op)) {
if (op.broken !== true) {
diff = DiffGenerator.applyOpToDiff(diff, op, update.meta);
}
}
return diff;
},
deletedContent = DiffGenerator._getContentOfPart(part).slice(0, op.d.length)
if deletedContent != op.d
throw new ConsistencyError("deleted content, '#{deletedContent}', does not match delete op, '#{op.d}'")
_consumeToOffset(remainingDiff, totalOffset) {
let part;
const consumedDiff = [];
let position = 0;
while ((part = remainingDiff.shift())) {
const length = DiffGenerator._getLengthOfDiffPart(part);
if (part.d != null) {
consumedDiff.push(part);
} else if ((position + length) >= totalOffset) {
const partOffset = totalOffset - position;
if (partOffset > 0) {
consumedDiff.push(DiffGenerator._slicePart(part, 0, partOffset));
}
if (partOffset < length) {
remainingDiff.unshift(DiffGenerator._slicePart(part, partOffset));
}
break;
} else {
position += length;
consumedDiff.push(part);
}
}
if part.u?
newPart =
d: op.d
meta: meta
else if part.i?
newPart = null
return {
consumedDiff,
remainingDiff
};
},
remainingOp = null
_consumeDiffAffectedByDeleteOp(remainingDiff, deleteOp, meta) {
const consumedDiff = [];
let remainingOp = deleteOp;
while (remainingOp && (remainingDiff.length > 0)) {
let newPart;
({newPart, remainingDiff, remainingOp} = DiffGenerator._consumeDeletedPart(remainingDiff, remainingOp, meta));
if (newPart != null) { consumedDiff.push(newPart); }
}
return {
consumedDiff,
remainingDiff
};
},
else if partLength == op.d.length
# The entire part has been deleted, but it is the last part
_consumeDeletedPart(remainingDiff, op, meta) {
let deletedContent, newPart, remainingOp;
const part = remainingDiff.shift();
const partLength = DiffGenerator._getLengthOfDiffPart(part);
deletedContent = DiffGenerator._getContentOfPart(part)
if deletedContent != op.d
throw new ConsistencyError("deleted content, '#{deletedContent}', does not match delete op, '#{op.d}'")
if (part.d != null) {
// Skip existing deletes
remainingOp = op;
newPart = part;
if part.u?
newPart =
d: op.d
meta: meta
else if part.i?
newPart = null
} else if (partLength > op.d.length) {
// Only the first bit of the part has been deleted
const remainingPart = DiffGenerator._slicePart(part, op.d.length);
remainingDiff.unshift(remainingPart);
remainingOp = null
deletedContent = DiffGenerator._getContentOfPart(part).slice(0, op.d.length);
if (deletedContent !== op.d) {
throw new ConsistencyError(`deleted content, '${deletedContent}', does not match delete op, '${op.d}'`);
}
else if partLength < op.d.length
# The entire part has been deleted and there is more
if (part.u != null) {
newPart = {
d: op.d,
meta
};
} else if (part.i != null) {
newPart = null;
}
deletedContent = DiffGenerator._getContentOfPart(part)
opContent = op.d.slice(0, deletedContent.length)
if deletedContent != opContent
throw new ConsistencyError("deleted content, '#{deletedContent}', does not match delete op, '#{opContent}'")
remainingOp = null;
if part.u
newPart =
d: part.u
meta: meta
else if part.i?
newPart = null
} else if (partLength === op.d.length) {
// The entire part has been deleted, but it is the last part
deletedContent = DiffGenerator._getContentOfPart(part);
if (deletedContent !== op.d) {
throw new ConsistencyError(`deleted content, '${deletedContent}', does not match delete op, '${op.d}'`);
}
if (part.u != null) {
newPart = {
d: op.d,
meta
};
} else if (part.i != null) {
newPart = null;
}
remainingOp = null;
} else if (partLength < op.d.length) {
// The entire part has been deleted and there is more
deletedContent = DiffGenerator._getContentOfPart(part);
const opContent = op.d.slice(0, deletedContent.length);
if (deletedContent !== opContent) {
throw new ConsistencyError(`deleted content, '${deletedContent}', does not match delete op, '${opContent}'`);
}
if (part.u) {
newPart = {
d: part.u,
meta
};
} else if (part.i != null) {
newPart = null;
}
remainingOp =
p: op.p, d: op.d.slice(DiffGenerator._getLengthOfDiffPart(part))
return {
newPart: newPart
remainingDiff: remainingDiff
remainingOp: remainingOp
{p: op.p, d: op.d.slice(DiffGenerator._getLengthOfDiffPart(part))};
}
_slicePart: (basePart, from, to) ->
if basePart.u?
part = { u: basePart.u.slice(from, to) }
else if basePart.i?
part = { i: basePart.i.slice(from, to) }
if basePart.meta?
part.meta = basePart.meta
return part
return {
newPart,
remainingDiff,
remainingOp
};
},
_getLengthOfDiffPart: (part) ->
(part.u or part.d or part.i or '').length
_slicePart(basePart, from, to) {
let part;
if (basePart.u != null) {
part = { u: basePart.u.slice(from, to) };
} else if (basePart.i != null) {
part = { i: basePart.i.slice(from, to) };
}
if (basePart.meta != null) {
part.meta = basePart.meta;
}
return part;
},
_getContentOfPart: (part) ->
part.u or part.d or part.i or ''
_getLengthOfDiffPart(part) {
return (part.u || part.d || part.i || '').length;
},
_getContentOfPart(part) {
return part.u || part.d || part.i || '';
}
});

View file

@ -1,88 +1,128 @@
UpdatesManager = require "./UpdatesManager"
DocumentUpdaterManager = require "./DocumentUpdaterManager"
DiffGenerator = require "./DiffGenerator"
logger = require "logger-sharelatex"
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let DiffManager;
const UpdatesManager = require("./UpdatesManager");
const DocumentUpdaterManager = require("./DocumentUpdaterManager");
const DiffGenerator = require("./DiffGenerator");
const logger = require("logger-sharelatex");
module.exports = DiffManager =
getLatestDocAndUpdates: (project_id, doc_id, fromVersion, callback = (error, content, version, updates) ->) ->
# Get updates last, since then they must be ahead and it
# might be possible to rewind to the same version as the doc.
DocumentUpdaterManager.getDocument project_id, doc_id, (error, content, version) ->
return callback(error) if error?
if !fromVersion? # If we haven't been given a version, just return lastest doc and no updates
return callback(null, content, version, [])
UpdatesManager.getDocUpdatesWithUserInfo project_id, doc_id, from: fromVersion, (error, updates) ->
return callback(error) if error?
callback(null, content, version, updates)
module.exports = (DiffManager = {
getLatestDocAndUpdates(project_id, doc_id, fromVersion, callback) {
// Get updates last, since then they must be ahead and it
// might be possible to rewind to the same version as the doc.
if (callback == null) { callback = function(error, content, version, updates) {}; }
return DocumentUpdaterManager.getDocument(project_id, doc_id, function(error, content, version) {
if (error != null) { return callback(error); }
if ((fromVersion == null)) { // If we haven't been given a version, just return lastest doc and no updates
return callback(null, content, version, []);
}
return UpdatesManager.getDocUpdatesWithUserInfo(project_id, doc_id, {from: fromVersion}, function(error, updates) {
if (error != null) { return callback(error); }
return callback(null, content, version, updates);
});
});
},
getDiff: (project_id, doc_id, fromVersion, toVersion, callback = (error, diff) ->) ->
DiffManager.getDocumentBeforeVersion project_id, doc_id, fromVersion, (error, startingContent, updates) ->
if error?
if error.message == "broken-history"
return callback(null, "history unavailable")
else
return callback(error)
getDiff(project_id, doc_id, fromVersion, toVersion, callback) {
if (callback == null) { callback = function(error, diff) {}; }
return DiffManager.getDocumentBeforeVersion(project_id, doc_id, fromVersion, function(error, startingContent, updates) {
let diff;
if (error != null) {
if (error.message === "broken-history") {
return callback(null, "history unavailable");
} else {
return callback(error);
}
}
updatesToApply = []
for update in updates.slice().reverse()
if update.v <= toVersion
updatesToApply.push update
const updatesToApply = [];
for (let update of Array.from(updates.slice().reverse())) {
if (update.v <= toVersion) {
updatesToApply.push(update);
}
}
try
diff = DiffGenerator.buildDiff startingContent, updatesToApply
catch e
return callback(e)
try {
diff = DiffGenerator.buildDiff(startingContent, updatesToApply);
} catch (e) {
return callback(e);
}
callback(null, diff)
return callback(null, diff);
});
},
getDocumentBeforeVersion: (project_id, doc_id, version, _callback = (error, document, rewoundUpdates) ->) ->
# Whichever order we get the latest document and the latest updates,
# there is potential for updates to be applied between them so that
# they do not return the same 'latest' versions.
# If this happens, we just retry and hopefully get them at the compatible
# versions.
retries = 3
callback = (error, args...) ->
if error?
if error.retry and retries > 0
logger.warn {error, project_id, doc_id, version, retries}, "retrying getDocumentBeforeVersion"
retry()
else
_callback(error)
else
_callback(null, args...)
getDocumentBeforeVersion(project_id, doc_id, version, _callback) {
// Whichever order we get the latest document and the latest updates,
// there is potential for updates to be applied between them so that
// they do not return the same 'latest' versions.
// If this happens, we just retry and hopefully get them at the compatible
// versions.
let retry;
if (_callback == null) { _callback = function(error, document, rewoundUpdates) {}; }
let retries = 3;
const callback = function(error, ...args) {
if (error != null) {
if (error.retry && (retries > 0)) {
logger.warn({error, project_id, doc_id, version, retries}, "retrying getDocumentBeforeVersion");
return retry();
} else {
return _callback(error);
}
} else {
return _callback(null, ...Array.from(args));
}
};
do retry = () ->
retries--
DiffManager._tryGetDocumentBeforeVersion(project_id, doc_id, version, callback)
return (retry = function() {
retries--;
return DiffManager._tryGetDocumentBeforeVersion(project_id, doc_id, version, callback);
})();
},
_tryGetDocumentBeforeVersion: (project_id, doc_id, version, callback = (error, document, rewoundUpdates) ->) ->
logger.log project_id: project_id, doc_id: doc_id, version: version, "getting document before version"
DiffManager.getLatestDocAndUpdates project_id, doc_id, version, (error, content, version, updates) ->
return callback(error) if error?
_tryGetDocumentBeforeVersion(project_id, doc_id, version, callback) {
if (callback == null) { callback = function(error, document, rewoundUpdates) {}; }
logger.log({project_id, doc_id, version}, "getting document before version");
return DiffManager.getLatestDocAndUpdates(project_id, doc_id, version, function(error, content, version, updates) {
let startingContent;
if (error != null) { return callback(error); }
# bail out if we hit a broken update
for u in updates when u.broken
return callback new Error "broken-history"
// bail out if we hit a broken update
for (let u of Array.from(updates)) {
if (u.broken) {
return callback(new Error("broken-history"));
}
}
# discard any updates which are ahead of this document version
while updates[0]?.v >= version
updates.shift()
// discard any updates which are ahead of this document version
while ((updates[0] != null ? updates[0].v : undefined) >= version) {
updates.shift();
}
lastUpdate = updates[0]
if lastUpdate? and lastUpdate.v != version - 1
error = new Error("latest update version, #{lastUpdate.v}, does not match doc version, #{version}")
error.retry = true
return callback error
const lastUpdate = updates[0];
if ((lastUpdate != null) && (lastUpdate.v !== (version - 1))) {
error = new Error(`latest update version, ${lastUpdate.v}, does not match doc version, ${version}`);
error.retry = true;
return callback(error);
}
logger.log {docVersion: version, lastUpdateVersion: lastUpdate?.v, updateCount: updates.length}, "rewinding updates"
logger.log({docVersion: version, lastUpdateVersion: (lastUpdate != null ? lastUpdate.v : undefined), updateCount: updates.length}, "rewinding updates");
tryUpdates = updates.slice().reverse()
const tryUpdates = updates.slice().reverse();
try
startingContent = DiffGenerator.rewindUpdates content, tryUpdates
# tryUpdates is reversed, and any unapplied ops are marked as broken
catch e
return callback(e)
try {
startingContent = DiffGenerator.rewindUpdates(content, tryUpdates);
// tryUpdates is reversed, and any unapplied ops are marked as broken
} catch (e) {
return callback(e);
}
callback(null, startingContent, tryUpdates)
return callback(null, startingContent, tryUpdates);
});
}
});

View file

@ -1,42 +1,63 @@
request = require "request"
logger = require "logger-sharelatex"
Settings = require "settings-sharelatex"
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let DocumentUpdaterManager;
const request = require("request");
const logger = require("logger-sharelatex");
const Settings = require("settings-sharelatex");
module.exports = DocumentUpdaterManager =
getDocument: (project_id, doc_id, callback = (error, content, version) ->) ->
url = "#{Settings.apis.documentupdater.url}/project/#{project_id}/doc/#{doc_id}"
logger.log project_id:project_id, doc_id: doc_id, "getting doc from document updater"
request.get url, (error, res, body)->
if error?
return callback(error)
if res.statusCode >= 200 and res.statusCode < 300
try
body = JSON.parse(body)
catch error
return callback(error)
logger.log {project_id, doc_id, version: body.version}, "got doc from document updater"
callback null, body.lines.join("\n"), body.version
else
error = new Error("doc updater returned a non-success status code: #{res.statusCode}")
logger.error err: error, project_id:project_id, doc_id:doc_id, url: url, "error accessing doc updater"
callback error
module.exports = (DocumentUpdaterManager = {
getDocument(project_id, doc_id, callback) {
if (callback == null) { callback = function(error, content, version) {}; }
const url = `${Settings.apis.documentupdater.url}/project/${project_id}/doc/${doc_id}`;
logger.log({project_id, doc_id}, "getting doc from document updater");
return request.get(url, function(error, res, body){
if (error != null) {
return callback(error);
}
if ((res.statusCode >= 200) && (res.statusCode < 300)) {
try {
body = JSON.parse(body);
} catch (error1) {
error = error1;
return callback(error);
}
logger.log({project_id, doc_id, version: body.version}, "got doc from document updater");
return callback(null, body.lines.join("\n"), body.version);
} else {
error = new Error(`doc updater returned a non-success status code: ${res.statusCode}`);
logger.error({err: error, project_id, doc_id, url}, "error accessing doc updater");
return callback(error);
}
});
},
setDocument: (project_id, doc_id, content, user_id, callback = (error) ->) ->
url = "#{Settings.apis.documentupdater.url}/project/#{project_id}/doc/#{doc_id}"
logger.log project_id:project_id, doc_id: doc_id, "setting doc in document updater"
request.post {
url: url
json:
lines: content.split("\n")
source: "restore"
user_id: user_id
setDocument(project_id, doc_id, content, user_id, callback) {
if (callback == null) { callback = function(error) {}; }
const url = `${Settings.apis.documentupdater.url}/project/${project_id}/doc/${doc_id}`;
logger.log({project_id, doc_id}, "setting doc in document updater");
return request.post({
url,
json: {
lines: content.split("\n"),
source: "restore",
user_id,
undoing: true
}, (error, res, body)->
if error?
return callback(error)
if res.statusCode >= 200 and res.statusCode < 300
callback null
else
error = new Error("doc updater returned a non-success status code: #{res.statusCode}")
logger.error err: error, project_id:project_id, doc_id:doc_id, url: url, "error accessing doc updater"
callback error
}
}, function(error, res, body){
if (error != null) {
return callback(error);
}
if ((res.statusCode >= 200) && (res.statusCode < 300)) {
return callback(null);
} else {
error = new Error(`doc updater returned a non-success status code: ${res.statusCode}`);
logger.error({err: error, project_id, doc_id, url}, "error accessing doc updater");
return callback(error);
}
});
}
});

View file

@ -1,46 +1,64 @@
ObjectId = require("mongojs").ObjectId
request = require("request")
async = require("async")
settings = require("settings-sharelatex")
port = settings.internal.trackchanges.port
logger = require "logger-sharelatex"
LockManager = require "./LockManager"
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const { ObjectId } = require("mongojs");
const request = require("request");
const async = require("async");
const settings = require("settings-sharelatex");
const { port } = settings.internal.trackchanges;
const logger = require("logger-sharelatex");
const LockManager = require("./LockManager");
module.exports =
check : (callback)->
project_id = ObjectId(settings.trackchanges.healthCheck.project_id)
url = "http://localhost:#{port}/project/#{project_id}"
logger.log project_id:project_id, "running health check"
jobs = [
(cb)->
request.get {url:"http://localhost:#{port}/check_lock", timeout:3000}, (err, res, body) ->
if err?
logger.err err:err, project_id:project_id, "error checking lock for health check"
cb(err)
else if res?.statusCode != 200
cb("status code not 200, it's #{res.statusCode}")
else
cb()
(cb)->
request.post {url:"#{url}/flush", timeout:10000}, (err, res, body) ->
if err?
logger.err err:err, project_id:project_id, "error flushing for health check"
cb(err)
else if res?.statusCode != 204
cb("status code not 204, it's #{res.statusCode}")
else
cb()
(cb)->
request.get {url:"#{url}/updates", timeout:10000}, (err, res, body)->
if err?
logger.err err:err, project_id:project_id, "error getting updates for health check"
cb(err)
else if res?.statusCode != 200
cb("status code not 200, it's #{res.statusCode}")
else
cb()
]
async.series jobs, callback
module.exports = {
check(callback){
const project_id = ObjectId(settings.trackchanges.healthCheck.project_id);
const url = `http://localhost:${port}/project/${project_id}`;
logger.log({project_id}, "running health check");
const jobs = [
cb=>
request.get({url:`http://localhost:${port}/check_lock`, timeout:3000}, function(err, res, body) {
if (err != null) {
logger.err({err, project_id}, "error checking lock for health check");
return cb(err);
} else if ((res != null ? res.statusCode : undefined) !== 200) {
return cb(`status code not 200, it's ${res.statusCode}`);
} else {
return cb();
}
})
,
cb=>
request.post({url:`${url}/flush`, timeout:10000}, function(err, res, body) {
if (err != null) {
logger.err({err, project_id}, "error flushing for health check");
return cb(err);
} else if ((res != null ? res.statusCode : undefined) !== 204) {
return cb(`status code not 204, it's ${res.statusCode}`);
} else {
return cb();
}
})
,
cb=>
request.get({url:`${url}/updates`, timeout:10000}, function(err, res, body){
if (err != null) {
logger.err({err, project_id}, "error getting updates for health check");
return cb(err);
} else if ((res != null ? res.statusCode : undefined) !== 200) {
return cb(`status code not 200, it's ${res.statusCode}`);
} else {
return cb();
}
})
checkLock: (callback) ->
LockManager.healthCheck callback
];
return async.series(jobs, callback);
},
checkLock(callback) {
return LockManager.healthCheck(callback);
}
};

View file

@ -1,137 +1,195 @@
UpdatesManager = require "./UpdatesManager"
DiffManager = require "./DiffManager"
PackManager = require "./PackManager"
RestoreManager = require "./RestoreManager"
logger = require "logger-sharelatex"
HealthChecker = require "./HealthChecker"
_ = require "underscore"
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let HttpController;
const UpdatesManager = require("./UpdatesManager");
const DiffManager = require("./DiffManager");
const PackManager = require("./PackManager");
const RestoreManager = require("./RestoreManager");
const logger = require("logger-sharelatex");
const HealthChecker = require("./HealthChecker");
const _ = require("underscore");
module.exports = HttpController =
flushDoc: (req, res, next = (error) ->) ->
doc_id = req.params.doc_id
project_id = req.params.project_id
logger.log project_id: project_id, doc_id: doc_id, "compressing doc history"
UpdatesManager.processUncompressedUpdatesWithLock project_id, doc_id, (error) ->
return next(error) if error?
res.send 204
module.exports = (HttpController = {
flushDoc(req, res, next) {
if (next == null) { next = function(error) {}; }
const { doc_id } = req.params;
const { project_id } = req.params;
logger.log({project_id, doc_id}, "compressing doc history");
return UpdatesManager.processUncompressedUpdatesWithLock(project_id, doc_id, function(error) {
if (error != null) { return next(error); }
return res.send(204);
});
},
flushProject: (req, res, next = (error) ->) ->
project_id = req.params.project_id
logger.log project_id: project_id, "compressing project history"
UpdatesManager.processUncompressedUpdatesForProject project_id, (error) ->
return next(error) if error?
res.send 204
flushProject(req, res, next) {
if (next == null) { next = function(error) {}; }
const { project_id } = req.params;
logger.log({project_id}, "compressing project history");
return UpdatesManager.processUncompressedUpdatesForProject(project_id, function(error) {
if (error != null) { return next(error); }
return res.send(204);
});
},
flushAll: (req, res, next = (error) ->) ->
# limit on projects to flush or -1 for all (default)
limit = if req.query.limit? then parseInt(req.query.limit, 10) else -1
logger.log {limit: limit}, "flushing all projects"
UpdatesManager.flushAll limit, (error, result) ->
return next(error) if error?
{failed, succeeded, all} = result
status = "#{succeeded.length} succeeded, #{failed.length} failed"
if limit == 0
res.status(200).send "#{status}\nwould flush:\n#{all.join('\n')}\n"
else if failed.length > 0
logger.log {failed: failed, succeeded: succeeded}, "error flushing projects"
res.status(500).send "#{status}\nfailed to flush:\n#{failed.join('\n')}\n"
else
res.status(200).send "#{status}\nflushed #{succeeded.length} projects of #{all.length}\n"
flushAll(req, res, next) {
// limit on projects to flush or -1 for all (default)
if (next == null) { next = function(error) {}; }
const limit = (req.query.limit != null) ? parseInt(req.query.limit, 10) : -1;
logger.log({limit}, "flushing all projects");
return UpdatesManager.flushAll(limit, function(error, result) {
if (error != null) { return next(error); }
const {failed, succeeded, all} = result;
const status = `${succeeded.length} succeeded, ${failed.length} failed`;
if (limit === 0) {
return res.status(200).send(`${status}\nwould flush:\n${all.join('\n')}\n`);
} else if (failed.length > 0) {
logger.log({failed, succeeded}, "error flushing projects");
return res.status(500).send(`${status}\nfailed to flush:\n${failed.join('\n')}\n`);
} else {
return res.status(200).send(`${status}\nflushed ${succeeded.length} projects of ${all.length}\n`);
}
});
},
checkDanglingUpdates: (req, res, next = (error) ->) ->
logger.log "checking dangling updates"
UpdatesManager.getDanglingUpdates (error, result) ->
return next(error) if error?
if result.length > 0
logger.log {dangling: result}, "found dangling updates"
res.status(500).send "dangling updates:\n#{result.join('\n')}\n"
else
res.status(200).send "no dangling updates found\n"
checkDanglingUpdates(req, res, next) {
if (next == null) { next = function(error) {}; }
logger.log("checking dangling updates");
return UpdatesManager.getDanglingUpdates(function(error, result) {
if (error != null) { return next(error); }
if (result.length > 0) {
logger.log({dangling: result}, "found dangling updates");
return res.status(500).send(`dangling updates:\n${result.join('\n')}\n`);
} else {
return res.status(200).send("no dangling updates found\n");
}
});
},
checkDoc: (req, res, next = (error) ->) ->
doc_id = req.params.doc_id
project_id = req.params.project_id
logger.log project_id: project_id, doc_id: doc_id, "checking doc history"
DiffManager.getDocumentBeforeVersion project_id, doc_id, 1, (error, document, rewoundUpdates) ->
return next(error) if error?
broken = []
for update in rewoundUpdates
for op in update.op when op.broken is true
broken.push op
if broken.length > 0
res.send broken
else
res.send 204
checkDoc(req, res, next) {
if (next == null) { next = function(error) {}; }
const { doc_id } = req.params;
const { project_id } = req.params;
logger.log({project_id, doc_id}, "checking doc history");
return DiffManager.getDocumentBeforeVersion(project_id, doc_id, 1, function(error, document, rewoundUpdates) {
if (error != null) { return next(error); }
const broken = [];
for (let update of Array.from(rewoundUpdates)) {
for (let op of Array.from(update.op)) {
if (op.broken === true) {
broken.push(op);
}
}
}
if (broken.length > 0) {
return res.send(broken);
} else {
return res.send(204);
}
});
},
getDiff: (req, res, next = (error) ->) ->
doc_id = req.params.doc_id
project_id = req.params.project_id
getDiff(req, res, next) {
let from, to;
if (next == null) { next = function(error) {}; }
const { doc_id } = req.params;
const { project_id } = req.params;
if req.query.from?
from = parseInt(req.query.from, 10)
else
from = null
if req.query.to?
to = parseInt(req.query.to, 10)
else
to = null
logger.log {project_id, doc_id, from, to}, "getting diff"
DiffManager.getDiff project_id, doc_id, from, to, (error, diff) ->
return next(error) if error?
res.json {diff: diff}
getUpdates: (req, res, next = (error) ->) ->
project_id = req.params.project_id
if req.query.before?
before = parseInt(req.query.before, 10)
if req.query.min_count?
min_count = parseInt(req.query.min_count, 10)
UpdatesManager.getSummarizedProjectUpdates project_id, before: before, min_count: min_count, (error, updates, nextBeforeTimestamp) ->
return next(error) if error?
res.json {
updates: updates
nextBeforeTimestamp: nextBeforeTimestamp
if (req.query.from != null) {
from = parseInt(req.query.from, 10);
} else {
from = null;
}
if (req.query.to != null) {
to = parseInt(req.query.to, 10);
} else {
to = null;
}
restore: (req, res, next = (error) ->) ->
{doc_id, project_id, version} = req.params
user_id = req.headers["x-user-id"]
version = parseInt(version, 10)
RestoreManager.restoreToBeforeVersion project_id, doc_id, version, user_id, (error) ->
return next(error) if error?
res.send 204
logger.log({project_id, doc_id, from, to}, "getting diff");
return DiffManager.getDiff(project_id, doc_id, from, to, function(error, diff) {
if (error != null) { return next(error); }
return res.json({diff});
});
},
pushDocHistory: (req, res, next = (error) ->) ->
project_id = req.params.project_id
doc_id = req.params.doc_id
logger.log {project_id, doc_id}, "pushing all finalised changes to s3"
PackManager.pushOldPacks project_id, doc_id, (error) ->
return next(error) if error?
res.send 204
getUpdates(req, res, next) {
let before, min_count;
if (next == null) { next = function(error) {}; }
const { project_id } = req.params;
pullDocHistory: (req, res, next = (error) ->) ->
project_id = req.params.project_id
doc_id = req.params.doc_id
logger.log {project_id, doc_id}, "pulling all packs from s3"
PackManager.pullOldPacks project_id, doc_id, (error) ->
return next(error) if error?
res.send 204
if (req.query.before != null) {
before = parseInt(req.query.before, 10);
}
if (req.query.min_count != null) {
min_count = parseInt(req.query.min_count, 10);
}
healthCheck: (req, res)->
HealthChecker.check (err)->
if err?
logger.err err:err, "error performing health check"
res.send 500
else
res.send 200
return UpdatesManager.getSummarizedProjectUpdates(project_id, {before, min_count}, function(error, updates, nextBeforeTimestamp) {
if (error != null) { return next(error); }
return res.json({
updates,
nextBeforeTimestamp
});
});
},
checkLock: (req, res)->
HealthChecker.checkLock (err) ->
if err?
logger.err err:err, "error performing lock check"
res.send 500
else
res.send 200
restore(req, res, next) {
if (next == null) { next = function(error) {}; }
let {doc_id, project_id, version} = req.params;
const user_id = req.headers["x-user-id"];
version = parseInt(version, 10);
return RestoreManager.restoreToBeforeVersion(project_id, doc_id, version, user_id, function(error) {
if (error != null) { return next(error); }
return res.send(204);
});
},
pushDocHistory(req, res, next) {
if (next == null) { next = function(error) {}; }
const { project_id } = req.params;
const { doc_id } = req.params;
logger.log({project_id, doc_id}, "pushing all finalised changes to s3");
return PackManager.pushOldPacks(project_id, doc_id, function(error) {
if (error != null) { return next(error); }
return res.send(204);
});
},
pullDocHistory(req, res, next) {
if (next == null) { next = function(error) {}; }
const { project_id } = req.params;
const { doc_id } = req.params;
logger.log({project_id, doc_id}, "pulling all packs from s3");
return PackManager.pullOldPacks(project_id, doc_id, function(error) {
if (error != null) { return next(error); }
return res.send(204);
});
},
healthCheck(req, res){
return HealthChecker.check(function(err){
if (err != null) {
logger.err({err}, "error performing health check");
return res.send(500);
} else {
return res.send(200);
}
});
},
checkLock(req, res){
return HealthChecker.checkLock(function(err) {
if (err != null) {
logger.err({err}, "error performing lock check");
return res.send(500);
} else {
return res.send(200);
}
});
}
});

View file

@ -1,85 +1,119 @@
Settings = require "settings-sharelatex"
redis = require("redis-sharelatex")
rclient = redis.createClient(Settings.redis.lock)
os = require "os"
crypto = require "crypto"
logger = require "logger-sharelatex"
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let LockManager;
const Settings = require("settings-sharelatex");
const redis = require("redis-sharelatex");
const rclient = redis.createClient(Settings.redis.lock);
const os = require("os");
const crypto = require("crypto");
const logger = require("logger-sharelatex");
HOST = os.hostname()
PID = process.pid
RND = crypto.randomBytes(4).toString('hex')
COUNT = 0
const HOST = os.hostname();
const PID = process.pid;
const RND = crypto.randomBytes(4).toString('hex');
let COUNT = 0;
module.exports = LockManager =
LOCK_TEST_INTERVAL: 50 # 50ms between each test of the lock
MAX_LOCK_WAIT_TIME: 10000 # 10s maximum time to spend trying to get the lock
LOCK_TTL: 300 # seconds (allow 5 minutes for any operation to complete)
module.exports = (LockManager = {
LOCK_TEST_INTERVAL: 50, // 50ms between each test of the lock
MAX_LOCK_WAIT_TIME: 10000, // 10s maximum time to spend trying to get the lock
LOCK_TTL: 300, // seconds (allow 5 minutes for any operation to complete)
# Use a signed lock value as described in
# http://redis.io/topics/distlock#correct-implementation-with-a-single-instance
# to prevent accidental unlocking by multiple processes
randomLock : () ->
time = Date.now()
return "locked:host=#{HOST}:pid=#{PID}:random=#{RND}:time=#{time}:count=#{COUNT++}"
// Use a signed lock value as described in
// http://redis.io/topics/distlock#correct-implementation-with-a-single-instance
// to prevent accidental unlocking by multiple processes
randomLock() {
const time = Date.now();
return `locked:host=${HOST}:pid=${PID}:random=${RND}:time=${time}:count=${COUNT++}`;
},
unlockScript: 'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("del", KEYS[1]) else return 0 end';
unlockScript: 'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("del", KEYS[1]) else return 0 end',
tryLock : (key, callback = (err, gotLock) ->) ->
lockValue = LockManager.randomLock()
rclient.set key, lockValue, "EX", @LOCK_TTL, "NX", (err, gotLock)->
return callback(err) if err?
if gotLock == "OK"
callback err, true, lockValue
else
callback err, false
tryLock(key, callback) {
if (callback == null) { callback = function(err, gotLock) {}; }
const lockValue = LockManager.randomLock();
return rclient.set(key, lockValue, "EX", this.LOCK_TTL, "NX", function(err, gotLock){
if (err != null) { return callback(err); }
if (gotLock === "OK") {
return callback(err, true, lockValue);
} else {
return callback(err, false);
}
});
},
getLock: (key, callback = (error) ->) ->
startTime = Date.now()
do attempt = () ->
if Date.now() - startTime > LockManager.MAX_LOCK_WAIT_TIME
e = new Error("Timeout")
e.key = key
return callback(e)
getLock(key, callback) {
let attempt;
if (callback == null) { callback = function(error) {}; }
const startTime = Date.now();
return (attempt = function() {
if ((Date.now() - startTime) > LockManager.MAX_LOCK_WAIT_TIME) {
const e = new Error("Timeout");
e.key = key;
return callback(e);
}
LockManager.tryLock key, (error, gotLock, lockValue) ->
return callback(error) if error?
if gotLock
callback(null, lockValue)
else
setTimeout attempt, LockManager.LOCK_TEST_INTERVAL
return LockManager.tryLock(key, function(error, gotLock, lockValue) {
if (error != null) { return callback(error); }
if (gotLock) {
return callback(null, lockValue);
} else {
return setTimeout(attempt, LockManager.LOCK_TEST_INTERVAL);
}
});
})();
},
checkLock: (key, callback = (err, isFree) ->) ->
rclient.exists key, (err, exists) ->
return callback(err) if err?
exists = parseInt exists
if exists == 1
callback err, false
else
callback err, true
checkLock(key, callback) {
if (callback == null) { callback = function(err, isFree) {}; }
return rclient.exists(key, function(err, exists) {
if (err != null) { return callback(err); }
exists = parseInt(exists);
if (exists === 1) {
return callback(err, false);
} else {
return callback(err, true);
}
});
},
releaseLock: (key, lockValue, callback) ->
rclient.eval LockManager.unlockScript, 1, key, lockValue, (err, result) ->
if err?
return callback(err)
if result? and result isnt 1 # successful unlock should release exactly one key
logger.error {key:key, lockValue:lockValue, redis_err:err, redis_result:result}, "unlocking error"
return callback(new Error("tried to release timed out lock"))
callback(err,result)
releaseLock(key, lockValue, callback) {
return rclient.eval(LockManager.unlockScript, 1, key, lockValue, function(err, result) {
if (err != null) {
return callback(err);
}
if ((result != null) && (result !== 1)) { // successful unlock should release exactly one key
logger.error({key, lockValue, redis_err:err, redis_result:result}, "unlocking error");
return callback(new Error("tried to release timed out lock"));
}
return callback(err,result);
});
},
runWithLock: (key, runner, callback = ( (error) -> )) ->
LockManager.getLock key, (error, lockValue) ->
return callback(error) if error?
runner (error1) ->
LockManager.releaseLock key, lockValue, (error2) ->
error = error1 or error2
return callback(error) if error?
callback()
runWithLock(key, runner, callback) {
if (callback == null) { callback = function(error) {}; }
return LockManager.getLock(key, function(error, lockValue) {
if (error != null) { return callback(error); }
return runner(error1 =>
LockManager.releaseLock(key, lockValue, function(error2) {
error = error1 || error2;
if (error != null) { return callback(error); }
return callback();
})
);
});
},
healthCheck: (callback) ->
action = (releaseLock) ->
releaseLock()
LockManager.runWithLock "HistoryLock:HealthCheck:host=#{HOST}:pid=#{PID}:random=#{RND}", action, callback
healthCheck(callback) {
const action = releaseLock => releaseLock();
return LockManager.runWithLock(`HistoryLock:HealthCheck:host=${HOST}:pid=${PID}:random=${RND}`, action, callback);
},
close: (callback) ->
rclient.quit()
rclient.once 'end', callback
close(callback) {
rclient.quit();
return rclient.once('end', callback);
}
});

View file

@ -1,118 +1,141 @@
settings = require "settings-sharelatex"
logger = require "logger-sharelatex"
AWS = require 'aws-sdk'
S3S = require 's3-streams'
{db, ObjectId} = require "./mongojs"
JSONStream = require "JSONStream"
ReadlineStream = require "byline"
zlib = require "zlib"
Metrics = require "metrics-sharelatex"
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let MongoAWS;
const settings = require("settings-sharelatex");
const logger = require("logger-sharelatex");
const AWS = require('aws-sdk');
const S3S = require('s3-streams');
const {db, ObjectId} = require("./mongojs");
const JSONStream = require("JSONStream");
const ReadlineStream = require("byline");
const zlib = require("zlib");
const Metrics = require("metrics-sharelatex");
DAYS = 24 * 3600 * 1000 # one day in milliseconds
const DAYS = 24 * 3600 * 1000; // one day in milliseconds
createStream = (streamConstructor, project_id, doc_id, pack_id) ->
AWS_CONFIG =
accessKeyId: settings.trackchanges.s3.key
secretAccessKey: settings.trackchanges.s3.secret
endpoint: settings.trackchanges.s3.endpoint
const createStream = function(streamConstructor, project_id, doc_id, pack_id) {
const AWS_CONFIG = {
accessKeyId: settings.trackchanges.s3.key,
secretAccessKey: settings.trackchanges.s3.secret,
endpoint: settings.trackchanges.s3.endpoint,
s3ForcePathStyle: settings.trackchanges.s3.pathStyle
};
return streamConstructor new AWS.S3(AWS_CONFIG), {
return streamConstructor(new AWS.S3(AWS_CONFIG), {
"Bucket": settings.trackchanges.stores.doc_history,
"Key": project_id+"/changes-"+doc_id+"/pack-"+pack_id
}
});
};
module.exports = MongoAWS =
module.exports = (MongoAWS = {
archivePack: (project_id, doc_id, pack_id, _callback = (error) ->) ->
archivePack(project_id, doc_id, pack_id, _callback) {
callback = (args...) ->
_callback(args...)
_callback = () ->
if (_callback == null) { _callback = function(error) {}; }
const callback = function(...args) {
_callback(...Array.from(args || []));
return _callback = function() {};
};
query = {
_id: ObjectId(pack_id)
const query = {
_id: ObjectId(pack_id),
doc_id: ObjectId(doc_id)
};
if ((project_id == null)) { return callback(new Error("invalid project id")); }
if ((doc_id == null)) { return callback(new Error("invalid doc id")); }
if ((pack_id == null)) { return callback(new Error("invalid pack id")); }
logger.log({project_id, doc_id, pack_id}, "uploading data to s3");
const upload = createStream(S3S.WriteStream, project_id, doc_id, pack_id);
return db.docHistory.findOne(query, function(err, result) {
if (err != null) { return callback(err); }
if ((result == null)) { return callback(new Error("cannot find pack to send to s3")); }
if (result.expiresAt != null) { return callback(new Error("refusing to send pack with TTL to s3")); }
const uncompressedData = JSON.stringify(result);
if (uncompressedData.indexOf("\u0000") !== -1) {
const error = new Error("null bytes found in upload");
logger.error({err: error, project_id, doc_id, pack_id}, error.message);
return callback(error);
}
return zlib.gzip(uncompressedData, function(err, buf) {
logger.log({project_id, doc_id, pack_id, origSize: uncompressedData.length, newSize: buf.length}, "compressed pack");
if (err != null) { return callback(err); }
upload.on('error', err => callback(err));
upload.on('finish', function() {
Metrics.inc("archive-pack");
logger.log({project_id, doc_id, pack_id}, "upload to s3 completed");
return callback(null);
});
upload.write(buf);
return upload.end();
});
});
},
return callback new Error("invalid project id") if not project_id?
return callback new Error("invalid doc id") if not doc_id?
return callback new Error("invalid pack id") if not pack_id?
readArchivedPack(project_id, doc_id, pack_id, _callback) {
if (_callback == null) { _callback = function(error, result) {}; }
const callback = function(...args) {
_callback(...Array.from(args || []));
return _callback = function() {};
};
logger.log {project_id, doc_id, pack_id}, "uploading data to s3"
if ((project_id == null)) { return callback(new Error("invalid project id")); }
if ((doc_id == null)) { return callback(new Error("invalid doc id")); }
if ((pack_id == null)) { return callback(new Error("invalid pack id")); }
upload = createStream S3S.WriteStream, project_id, doc_id, pack_id
logger.log({project_id, doc_id, pack_id}, "downloading data from s3");
db.docHistory.findOne query, (err, result) ->
return callback(err) if err?
return callback new Error("cannot find pack to send to s3") if not result?
return callback new Error("refusing to send pack with TTL to s3") if result.expiresAt?
uncompressedData = JSON.stringify(result)
if uncompressedData.indexOf("\u0000") != -1
error = new Error("null bytes found in upload")
logger.error err: error, project_id: project_id, doc_id: doc_id, pack_id: pack_id, error.message
return callback(error)
zlib.gzip uncompressedData, (err, buf) ->
logger.log {project_id, doc_id, pack_id, origSize: uncompressedData.length, newSize: buf.length}, "compressed pack"
return callback(err) if err?
upload.on 'error', (err) ->
callback(err)
upload.on 'finish', () ->
Metrics.inc("archive-pack")
logger.log {project_id, doc_id, pack_id}, "upload to s3 completed"
callback(null)
upload.write buf
upload.end()
const download = createStream(S3S.ReadStream, project_id, doc_id, pack_id);
readArchivedPack: (project_id, doc_id, pack_id, _callback = (error, result) ->) ->
callback = (args...) ->
_callback(args...)
_callback = () ->
const inputStream = download
.on('open', obj => 1).on('error', err => callback(err));
return callback new Error("invalid project id") if not project_id?
return callback new Error("invalid doc id") if not doc_id?
return callback new Error("invalid pack id") if not pack_id?
const gunzip = zlib.createGunzip();
gunzip.setEncoding('utf8');
gunzip.on('error', function(err) {
logger.log({project_id, doc_id, pack_id, err}, "error uncompressing gzip stream");
return callback(err);
});
logger.log {project_id, doc_id, pack_id}, "downloading data from s3"
const outputStream = inputStream.pipe(gunzip);
const parts = [];
outputStream.on('error', err => callback(err));
outputStream.on('end', function() {
let object;
logger.log({project_id, doc_id, pack_id}, "download from s3 completed");
try {
object = JSON.parse(parts.join(''));
} catch (e) {
return callback(e);
}
object._id = ObjectId(object._id);
object.doc_id = ObjectId(object.doc_id);
object.project_id = ObjectId(object.project_id);
for (let op of Array.from(object.pack)) {
if (op._id != null) { op._id = ObjectId(op._id); }
}
return callback(null, object);
});
return outputStream.on('data', data => parts.push(data));
},
download = createStream S3S.ReadStream, project_id, doc_id, pack_id
inputStream = download
.on 'open', (obj) ->
return 1
.on 'error', (err) ->
callback(err)
gunzip = zlib.createGunzip()
gunzip.setEncoding('utf8')
gunzip.on 'error', (err) ->
logger.log {project_id, doc_id, pack_id, err}, "error uncompressing gzip stream"
callback(err)
outputStream = inputStream.pipe gunzip
parts = []
outputStream.on 'error', (err) ->
return callback(err)
outputStream.on 'end', () ->
logger.log {project_id, doc_id, pack_id}, "download from s3 completed"
try
object = JSON.parse parts.join('')
catch e
return callback(e)
object._id = ObjectId(object._id)
object.doc_id = ObjectId(object.doc_id)
object.project_id = ObjectId(object.project_id)
for op in object.pack
op._id = ObjectId(op._id) if op._id?
callback null, object
outputStream.on 'data', (data) ->
parts.push data
unArchivePack: (project_id, doc_id, pack_id, callback = (error) ->) ->
MongoAWS.readArchivedPack project_id, doc_id, pack_id, (err, object) ->
return callback(err) if err?
Metrics.inc("unarchive-pack")
# allow the object to expire, we can always retrieve it again
object.expiresAt = new Date(Date.now() + 7 * DAYS)
logger.log {project_id, doc_id, pack_id}, "inserting object from s3"
db.docHistory.insert object, callback
unArchivePack(project_id, doc_id, pack_id, callback) {
if (callback == null) { callback = function(error) {}; }
return MongoAWS.readArchivedPack(project_id, doc_id, pack_id, function(err, object) {
if (err != null) { return callback(err); }
Metrics.inc("unarchive-pack");
// allow the object to expire, we can always retrieve it again
object.expiresAt = new Date(Date.now() + (7 * DAYS));
logger.log({project_id, doc_id, pack_id}, "inserting object from s3");
return db.docHistory.insert(object, callback);
});
}
});

View file

@ -1,104 +1,131 @@
{db, ObjectId} = require "./mongojs"
PackManager = require "./PackManager"
async = require "async"
_ = require "underscore"
metrics = require 'metrics-sharelatex'
logger = require 'logger-sharelatex'
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let MongoManager;
const {db, ObjectId} = require("./mongojs");
const PackManager = require("./PackManager");
const async = require("async");
const _ = require("underscore");
const metrics = require('metrics-sharelatex');
const logger = require('logger-sharelatex');
module.exports = MongoManager =
getLastCompressedUpdate: (doc_id, callback = (error, update) ->) ->
db.docHistory
.find(doc_id: ObjectId(doc_id.toString()), {pack: {$slice:-1}}) # only return the last entry in a pack
.sort( v: -1 )
module.exports = (MongoManager = {
getLastCompressedUpdate(doc_id, callback) {
if (callback == null) { callback = function(error, update) {}; }
return db.docHistory
.find({doc_id: ObjectId(doc_id.toString())}, {pack: {$slice:-1}}) // only return the last entry in a pack
.sort({ v: -1 })
.limit(1)
.toArray (error, compressedUpdates) ->
return callback(error) if error?
callback null, compressedUpdates[0] or null
.toArray(function(error, compressedUpdates) {
if (error != null) { return callback(error); }
return callback(null, compressedUpdates[0] || null);
});
},
peekLastCompressedUpdate: (doc_id, callback = (error, update, version) ->) ->
# under normal use we pass back the last update as
# callback(null,update,version).
#
# when we have an existing last update but want to force a new one
# to start, we pass it back as callback(null,null,version), just
# giving the version so we can check consistency.
MongoManager.getLastCompressedUpdate doc_id, (error, update) ->
return callback(error) if error?
if update?
if update.broken # marked as broken so we will force a new op
return callback null, null
else if update.pack?
if update.finalised # no more ops can be appended
return callback null, null, update.pack[0]?.v
else
return callback null, update, update.pack[0]?.v
else
return callback null, update, update.v
else
PackManager.getLastPackFromIndex doc_id, (error, pack) ->
return callback(error) if error?
return callback(null, null, pack.v_end) if pack?.inS3? and pack?.v_end?
callback null, null
peekLastCompressedUpdate(doc_id, callback) {
// under normal use we pass back the last update as
// callback(null,update,version).
//
// when we have an existing last update but want to force a new one
// to start, we pass it back as callback(null,null,version), just
// giving the version so we can check consistency.
if (callback == null) { callback = function(error, update, version) {}; }
return MongoManager.getLastCompressedUpdate(doc_id, function(error, update) {
if (error != null) { return callback(error); }
if (update != null) {
if (update.broken) { // marked as broken so we will force a new op
return callback(null, null);
} else if (update.pack != null) {
if (update.finalised) { // no more ops can be appended
return callback(null, null, update.pack[0] != null ? update.pack[0].v : undefined);
} else {
return callback(null, update, update.pack[0] != null ? update.pack[0].v : undefined);
}
} else {
return callback(null, update, update.v);
}
} else {
return PackManager.getLastPackFromIndex(doc_id, function(error, pack) {
if (error != null) { return callback(error); }
if (((pack != null ? pack.inS3 : undefined) != null) && ((pack != null ? pack.v_end : undefined) != null)) { return callback(null, null, pack.v_end); }
return callback(null, null);
});
}
});
},
backportProjectId: (project_id, doc_id, callback = (error) ->) ->
db.docHistory.update {
doc_id: ObjectId(doc_id.toString())
backportProjectId(project_id, doc_id, callback) {
if (callback == null) { callback = function(error) {}; }
return db.docHistory.update({
doc_id: ObjectId(doc_id.toString()),
project_id: { $exists: false }
}, {
$set: { project_id: ObjectId(project_id.toString()) }
}, {
multi: true
}, callback
}, callback);
},
getProjectMetaData: (project_id, callback = (error, metadata) ->) ->
db.projectHistoryMetaData.find {
getProjectMetaData(project_id, callback) {
if (callback == null) { callback = function(error, metadata) {}; }
return db.projectHistoryMetaData.find({
project_id: ObjectId(project_id.toString())
}, (error, results) ->
return callback(error) if error?
callback null, results[0]
}, function(error, results) {
if (error != null) { return callback(error); }
return callback(null, results[0]);
});
},
setProjectMetaData: (project_id, metadata, callback = (error) ->) ->
db.projectHistoryMetaData.update {
setProjectMetaData(project_id, metadata, callback) {
if (callback == null) { callback = function(error) {}; }
return db.projectHistoryMetaData.update({
project_id: ObjectId(project_id)
}, {
$set: metadata
}, {
upsert: true
}, callback
}, callback);
},
upgradeHistory: (project_id, callback = (error) ->) ->
# preserve the project's existing history
db.docHistory.update {
project_id: ObjectId(project_id)
temporary: true
upgradeHistory(project_id, callback) {
// preserve the project's existing history
if (callback == null) { callback = function(error) {}; }
return db.docHistory.update({
project_id: ObjectId(project_id),
temporary: true,
expiresAt: {$exists: true}
}, {
$set: {temporary: false}
$set: {temporary: false},
$unset: {expiresAt: ""}
}, {
multi: true
}, callback
}, callback);
},
ensureIndices: () ->
# For finding all updates that go into a diff for a doc
db.docHistory.ensureIndex { doc_id: 1, v: 1 }, { background: true }
# For finding all updates that affect a project
db.docHistory.ensureIndex { project_id: 1, "meta.end_ts": 1 }, { background: true }
# For finding updates that don't yet have a project_id and need it inserting
db.docHistory.ensureIndex { doc_id: 1, project_id: 1 }, { background: true }
# For finding project meta-data
db.projectHistoryMetaData.ensureIndex { project_id: 1 }, { background: true }
# TTL index for auto deleting week old temporary ops
db.docHistory.ensureIndex { expiresAt: 1 }, { expireAfterSeconds: 0, background: true }
# For finding packs to be checked for archiving
db.docHistory.ensureIndex { last_checked: 1 }, { background: true }
# For finding archived packs
db.docHistoryIndex.ensureIndex { project_id: 1 }, { background: true }
ensureIndices() {
// For finding all updates that go into a diff for a doc
db.docHistory.ensureIndex({ doc_id: 1, v: 1 }, { background: true });
// For finding all updates that affect a project
db.docHistory.ensureIndex({ project_id: 1, "meta.end_ts": 1 }, { background: true });
// For finding updates that don't yet have a project_id and need it inserting
db.docHistory.ensureIndex({ doc_id: 1, project_id: 1 }, { background: true });
// For finding project meta-data
db.projectHistoryMetaData.ensureIndex({ project_id: 1 }, { background: true });
// TTL index for auto deleting week old temporary ops
db.docHistory.ensureIndex({ expiresAt: 1 }, { expireAfterSeconds: 0, background: true });
// For finding packs to be checked for archiving
db.docHistory.ensureIndex({ last_checked: 1 }, { background: true });
// For finding archived packs
return db.docHistoryIndex.ensureIndex({ project_id: 1 }, { background: true });
}
});
[
'getLastCompressedUpdate',
'getProjectMetaData',
'setProjectMetaData'
].map (method) ->
metrics.timeAsyncMethod(MongoManager, method, 'mongo.MongoManager', logger)
].map(method => metrics.timeAsyncMethod(MongoManager, method, 'mongo.MongoManager', logger));

File diff suppressed because it is too large Load diff

View file

@ -1,139 +1,183 @@
Settings = require "settings-sharelatex"
async = require "async"
_ = require "underscore"
{db, ObjectId, BSON} = require "./mongojs"
fs = require "fs"
Metrics = require "metrics-sharelatex"
Metrics.initialize("track-changes")
logger = require "logger-sharelatex"
logger.initialize("track-changes-packworker")
if Settings.sentry?.dsn?
logger.initializeErrorReporting(Settings.sentry.dsn)
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS103: Rewrite code to no longer use __guard__
* DS205: Consider reworking code to avoid use of IIFEs
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let LIMIT, pending;
let project_id, doc_id;
const Settings = require("settings-sharelatex");
const async = require("async");
const _ = require("underscore");
const {db, ObjectId, BSON} = require("./mongojs");
const fs = require("fs");
const Metrics = require("metrics-sharelatex");
Metrics.initialize("track-changes");
const logger = require("logger-sharelatex");
logger.initialize("track-changes-packworker");
if ((Settings.sentry != null ? Settings.sentry.dsn : undefined) != null) {
logger.initializeErrorReporting(Settings.sentry.dsn);
}
DAYS = 24 * 3600 * 1000
const DAYS = 24 * 3600 * 1000;
LockManager = require "./LockManager"
PackManager = require "./PackManager"
const LockManager = require("./LockManager");
const PackManager = require("./PackManager");
# this worker script is forked by the main process to look for
# document histories which can be archived
// this worker script is forked by the main process to look for
// document histories which can be archived
source = process.argv[2]
DOCUMENT_PACK_DELAY = Number(process.argv[3]) || 1000
TIMEOUT = Number(process.argv[4]) || 30*60*1000
COUNT = 0 # number processed
TOTAL = 0 # total number to process
const source = process.argv[2];
const DOCUMENT_PACK_DELAY = Number(process.argv[3]) || 1000;
const TIMEOUT = Number(process.argv[4]) || (30*60*1000);
let COUNT = 0; // number processed
let TOTAL = 0; // total number to process
if !source.match(/^[0-9]+$/)
file = fs.readFileSync source
result = for line in file.toString().split('\n')
[project_id, doc_id] = line.split(' ')
{doc_id, project_id}
pending = _.filter result, (row) -> row?.doc_id?.match(/^[a-f0-9]{24}$/)
else
LIMIT = Number(process.argv[2]) || 1000
if (!source.match(/^[0-9]+$/)) {
const file = fs.readFileSync(source);
const result = (() => {
const result1 = [];
for (let line of Array.from(file.toString().split('\n'))) {
[project_id, doc_id] = Array.from(line.split(' '));
result1.push({doc_id, project_id});
}
return result1;
})();
pending = _.filter(result, row => __guard__(row != null ? row.doc_id : undefined, x => x.match(/^[a-f0-9]{24}$/)));
} else {
LIMIT = Number(process.argv[2]) || 1000;
}
shutDownRequested = false
shutDownTimer = setTimeout () ->
logger.log "pack timed out, requesting shutdown"
# start the shutdown on the next pack
shutDownRequested = true
# do a hard shutdown after a further 5 minutes
hardTimeout = setTimeout () ->
logger.error "HARD TIMEOUT in pack archive worker"
process.exit()
, 5*60*1000
hardTimeout.unref()
, TIMEOUT
let shutDownRequested = false;
const shutDownTimer = setTimeout(function() {
logger.log("pack timed out, requesting shutdown");
// start the shutdown on the next pack
shutDownRequested = true;
// do a hard shutdown after a further 5 minutes
const hardTimeout = setTimeout(function() {
logger.error("HARD TIMEOUT in pack archive worker");
return process.exit();
}
, 5*60*1000);
return hardTimeout.unref();
}
, TIMEOUT);
logger.log "checking for updates, limit=#{LIMIT}, delay=#{DOCUMENT_PACK_DELAY}, timeout=#{TIMEOUT}"
logger.log(`checking for updates, limit=${LIMIT}, delay=${DOCUMENT_PACK_DELAY}, timeout=${TIMEOUT}`);
# work around for https://github.com/mafintosh/mongojs/issues/224
db.close = (callback) ->
this._getServer (err, server) ->
return callback(err) if err?
server = if server.destroy? then server else server.topology
server.destroy(true, true)
callback()
// work around for https://github.com/mafintosh/mongojs/issues/224
db.close = function(callback) {
return this._getServer(function(err, server) {
if (err != null) { return callback(err); }
server = (server.destroy != null) ? server : server.topology;
server.destroy(true, true);
return callback();
});
};
finish = () ->
if shutDownTimer?
logger.log 'cancelling timeout'
clearTimeout shutDownTimer
logger.log 'closing db'
db.close () ->
logger.log 'closing LockManager Redis Connection'
LockManager.close () ->
logger.log {processedCount: COUNT, allCount: TOTAL}, 'ready to exit from pack archive worker'
hardTimeout = setTimeout () ->
logger.error 'hard exit from pack archive worker'
process.exit(1)
, 5*1000
hardTimeout.unref()
const finish = function() {
if (shutDownTimer != null) {
logger.log('cancelling timeout');
clearTimeout(shutDownTimer);
}
logger.log('closing db');
return db.close(function() {
logger.log('closing LockManager Redis Connection');
return LockManager.close(function() {
logger.log({processedCount: COUNT, allCount: TOTAL}, 'ready to exit from pack archive worker');
const hardTimeout = setTimeout(function() {
logger.error('hard exit from pack archive worker');
return process.exit(1);
}
, 5*1000);
return hardTimeout.unref();
});
});
};
process.on 'exit', (code) ->
logger.log {code}, 'pack archive worker exited'
process.on('exit', code => logger.log({code}, 'pack archive worker exited'));
processUpdates = (pending) ->
async.eachSeries pending, (result, callback) ->
{_id, project_id, doc_id} = result
COUNT++
logger.log {project_id, doc_id}, "processing #{COUNT}/#{TOTAL}"
if not project_id? or not doc_id?
logger.log {project_id, doc_id}, "skipping pack, missing project/doc id"
return callback()
handler = (err, result) ->
if err? and err.code is "InternalError" and err.retryable
logger.warn {err, result}, "ignoring S3 error in pack archive worker"
# Ignore any s3 errors due to random problems
err = null
if err?
logger.error {err, result}, "error in pack archive worker"
return callback(err)
if shutDownRequested
logger.warn "shutting down pack archive worker"
return callback(new Error("shutdown"))
setTimeout () ->
callback(err, result)
, DOCUMENT_PACK_DELAY
if not _id?
PackManager.pushOldPacks project_id, doc_id, handler
else
PackManager.processOldPack project_id, doc_id, _id, handler
, (err, results) ->
if err? and err.message != "shutdown"
logger.error {err}, 'error in pack archive worker processUpdates'
finish()
const processUpdates = pending =>
async.eachSeries(pending, function(result, callback) {
let _id;
({_id, project_id, doc_id} = result);
COUNT++;
logger.log({project_id, doc_id}, `processing ${COUNT}/${TOTAL}`);
if ((project_id == null) || (doc_id == null)) {
logger.log({project_id, doc_id}, "skipping pack, missing project/doc id");
return callback();
}
const handler = function(err, result) {
if ((err != null) && (err.code === "InternalError") && err.retryable) {
logger.warn({err, result}, "ignoring S3 error in pack archive worker");
// Ignore any s3 errors due to random problems
err = null;
}
if (err != null) {
logger.error({err, result}, "error in pack archive worker");
return callback(err);
}
if (shutDownRequested) {
logger.warn("shutting down pack archive worker");
return callback(new Error("shutdown"));
}
return setTimeout(() => callback(err, result)
, DOCUMENT_PACK_DELAY);
};
if ((_id == null)) {
return PackManager.pushOldPacks(project_id, doc_id, handler);
} else {
return PackManager.processOldPack(project_id, doc_id, _id, handler);
}
}
, function(err, results) {
if ((err != null) && (err.message !== "shutdown")) {
logger.error({err}, 'error in pack archive worker processUpdates');
}
return finish();
})
;
# find the packs which can be archived
// find the packs which can be archived
ObjectIdFromDate = (date) ->
id = Math.floor(date.getTime() / 1000).toString(16) + "0000000000000000";
return ObjectId(id)
const ObjectIdFromDate = function(date) {
const id = Math.floor(date.getTime() / 1000).toString(16) + "0000000000000000";
return ObjectId(id);
};
# new approach, two passes
# find packs to be marked as finalised:true, those which have a newer pack present
# then only consider finalised:true packs for archiving
// new approach, two passes
// find packs to be marked as finalised:true, those which have a newer pack present
// then only consider finalised:true packs for archiving
if pending?
logger.log "got #{pending.length} entries from #{source}"
processUpdates pending
else
oneWeekAgo = new Date(Date.now() - 7 * DAYS)
if (pending != null) {
logger.log(`got ${pending.length} entries from ${source}`);
processUpdates(pending);
} else {
const oneWeekAgo = new Date(Date.now() - (7 * DAYS));
db.docHistory.find({
expiresAt: {$exists: false}
project_id: {$exists: true}
v_end: {$exists: true}
_id: {$lt: ObjectIdFromDate(oneWeekAgo)}
expiresAt: {$exists: false},
project_id: {$exists: true},
v_end: {$exists: true},
_id: {$lt: ObjectIdFromDate(oneWeekAgo)},
last_checked: {$lt: oneWeekAgo}
}, {_id:1, doc_id:1, project_id:1}).sort({
last_checked:1
}).limit LIMIT, (err, results) ->
if err?
logger.log {err}, 'error checking for updates'
finish()
return
pending = _.uniq results, false, (result) -> result.doc_id.toString()
TOTAL = pending.length
logger.log "found #{TOTAL} documents to archive"
processUpdates pending
}).limit(LIMIT, function(err, results) {
if (err != null) {
logger.log({err}, 'error checking for updates');
finish();
return;
}
pending = _.uniq(results, false, result => result.doc_id.toString());
TOTAL = pending.length;
logger.log(`found ${TOTAL} documents to archive`);
return processUpdates(pending);
});
}
function __guard__(value, transform) {
return (typeof value !== 'undefined' && value !== null) ? transform(value) : undefined;
}

View file

@ -1,62 +1,84 @@
Heap = require "heap"
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let ProjectIterator;
const Heap = require("heap");
module.exports = ProjectIterator =
module.exports = (ProjectIterator =
class ProjectIterator
constructor: (packs, @before, @getPackByIdFn) ->
byEndTs = (a,b) -> (b.meta.end_ts - a.meta.end_ts) || (a.fromIndex - b.fromIndex)
@packs = packs.slice().sort byEndTs
@queue = new Heap(byEndTs)
(ProjectIterator = class ProjectIterator {
constructor(packs, before, getPackByIdFn) {
this.before = before;
this.getPackByIdFn = getPackByIdFn;
const byEndTs = (a,b) => (b.meta.end_ts - a.meta.end_ts) || (a.fromIndex - b.fromIndex);
this.packs = packs.slice().sort(byEndTs);
this.queue = new Heap(byEndTs);
}
next: (callback) ->
# what's up next
#console.log ">>> top item", iterator.packs[0]
iterator = this
before = @before
queue = iterator.queue
opsToReturn = []
nextPack = iterator.packs[0]
lowWaterMark = nextPack?.meta.end_ts || 0
nextItem = queue.peek()
next(callback) {
// what's up next
//console.log ">>> top item", iterator.packs[0]
const iterator = this;
const { before } = this;
const { queue } = iterator;
const opsToReturn = [];
let nextPack = iterator.packs[0];
let lowWaterMark = (nextPack != null ? nextPack.meta.end_ts : undefined) || 0;
let nextItem = queue.peek();
#console.log "queue empty?", queue.empty()
#console.log "nextItem", nextItem
#console.log "nextItem.meta.end_ts", nextItem?.meta.end_ts
#console.log "lowWaterMark", lowWaterMark
//console.log "queue empty?", queue.empty()
//console.log "nextItem", nextItem
//console.log "nextItem.meta.end_ts", nextItem?.meta.end_ts
//console.log "lowWaterMark", lowWaterMark
while before? and nextPack?.meta.start_ts > before
# discard pack that is outside range
iterator.packs.shift()
nextPack = iterator.packs[0]
lowWaterMark = nextPack?.meta.end_ts || 0
while ((before != null) && ((nextPack != null ? nextPack.meta.start_ts : undefined) > before)) {
// discard pack that is outside range
iterator.packs.shift();
nextPack = iterator.packs[0];
lowWaterMark = (nextPack != null ? nextPack.meta.end_ts : undefined) || 0;
}
if (queue.empty() or nextItem?.meta.end_ts <= lowWaterMark) and nextPack?
# retrieve the next pack and populate the queue
return @getPackByIdFn nextPack.project_id, nextPack.doc_id, nextPack._id, (err, pack) ->
return callback(err) if err?
iterator.packs.shift() # have now retrieved this pack, remove it
#console.log "got pack", pack
for op in pack.pack when (not before? or op.meta.end_ts < before)
#console.log "adding op", op
op.doc_id = nextPack.doc_id
op.project_id = nextPack.project_id
queue.push op
# now try again
return iterator.next(callback)
if ((queue.empty() || ((nextItem != null ? nextItem.meta.end_ts : undefined) <= lowWaterMark)) && (nextPack != null)) {
// retrieve the next pack and populate the queue
return this.getPackByIdFn(nextPack.project_id, nextPack.doc_id, nextPack._id, function(err, pack) {
if (err != null) { return callback(err); }
iterator.packs.shift(); // have now retrieved this pack, remove it
//console.log "got pack", pack
for (let op of Array.from(pack.pack)) {
//console.log "adding op", op
if ((before == null) || (op.meta.end_ts < before)) {
op.doc_id = nextPack.doc_id;
op.project_id = nextPack.project_id;
queue.push(op);
}
}
// now try again
return iterator.next(callback);
});
}
#console.log "nextItem", nextItem, "lowWaterMark", lowWaterMark
while nextItem? and (nextItem?.meta.end_ts > lowWaterMark)
opsToReturn.push nextItem
queue.pop()
nextItem = queue.peek()
//console.log "nextItem", nextItem, "lowWaterMark", lowWaterMark
while ((nextItem != null) && ((nextItem != null ? nextItem.meta.end_ts : undefined) > lowWaterMark)) {
opsToReturn.push(nextItem);
queue.pop();
nextItem = queue.peek();
}
#console.log "queue empty?", queue.empty()
#console.log "nextPack", nextPack?
//console.log "queue empty?", queue.empty()
//console.log "nextPack", nextPack?
if queue.empty() and not nextPack? # got everything
iterator._done = true
if (queue.empty() && (nextPack == null)) { // got everything
iterator._done = true;
}
callback(null, opsToReturn)
return callback(null, opsToReturn);
}
done: () ->
return @_done
done() {
return this._done;
}
}));

View file

@ -1,80 +1,121 @@
Settings = require "settings-sharelatex"
redis = require("redis-sharelatex")
rclient = redis.createClient(Settings.redis.history)
Keys = Settings.redis.history.key_schema
async = require "async"
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS205: Consider reworking code to avoid use of IIFEs
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let RedisManager;
const Settings = require("settings-sharelatex");
const redis = require("redis-sharelatex");
const rclient = redis.createClient(Settings.redis.history);
const Keys = Settings.redis.history.key_schema;
const async = require("async");
module.exports = RedisManager =
module.exports = (RedisManager = {
getOldestDocUpdates: (doc_id, batchSize, callback = (error, jsonUpdates) ->) ->
key = Keys.uncompressedHistoryOps({doc_id})
rclient.lrange key, 0, batchSize - 1, callback
getOldestDocUpdates(doc_id, batchSize, callback) {
if (callback == null) { callback = function(error, jsonUpdates) {}; }
const key = Keys.uncompressedHistoryOps({doc_id});
return rclient.lrange(key, 0, batchSize - 1, callback);
},
expandDocUpdates: (jsonUpdates, callback = (error, rawUpdates) ->) ->
try
rawUpdates = ( JSON.parse(update) for update in jsonUpdates or [] )
catch e
return callback(e)
callback null, rawUpdates
expandDocUpdates(jsonUpdates, callback) {
let rawUpdates;
if (callback == null) { callback = function(error, rawUpdates) {}; }
try {
rawUpdates = ( Array.from(jsonUpdates || []).map((update) => JSON.parse(update)) );
} catch (e) {
return callback(e);
}
return callback(null, rawUpdates);
},
deleteAppliedDocUpdates: (project_id, doc_id, docUpdates, callback = (error) ->) ->
multi = rclient.multi()
# Delete all the updates which have been applied (exact match)
for update in docUpdates or []
multi.lrem Keys.uncompressedHistoryOps({doc_id}), 1, update
multi.exec (error, results) ->
return callback(error) if error?
# It's ok to delete the doc_id from the set here. Even though the list
# of updates may not be empty, we will continue to process it until it is.
rclient.srem Keys.docsWithHistoryOps({project_id}), doc_id, (error) ->
return callback(error) if error?
callback null
deleteAppliedDocUpdates(project_id, doc_id, docUpdates, callback) {
if (callback == null) { callback = function(error) {}; }
const multi = rclient.multi();
// Delete all the updates which have been applied (exact match)
for (let update of Array.from(docUpdates || [])) {
multi.lrem(Keys.uncompressedHistoryOps({doc_id}), 1, update);
}
return multi.exec(function(error, results) {
if (error != null) { return callback(error); }
// It's ok to delete the doc_id from the set here. Even though the list
// of updates may not be empty, we will continue to process it until it is.
return rclient.srem(Keys.docsWithHistoryOps({project_id}), doc_id, function(error) {
if (error != null) { return callback(error); }
return callback(null);
});
});
},
getDocIdsWithHistoryOps: (project_id, callback = (error, doc_ids) ->) ->
rclient.smembers Keys.docsWithHistoryOps({project_id}), callback
getDocIdsWithHistoryOps(project_id, callback) {
if (callback == null) { callback = function(error, doc_ids) {}; }
return rclient.smembers(Keys.docsWithHistoryOps({project_id}), callback);
},
# iterate over keys asynchronously using redis scan (non-blocking)
# handle all the cluster nodes or single redis server
_getKeys: (pattern, callback) ->
nodes = rclient.nodes?('master') || [ rclient ];
doKeyLookupForNode = (node, cb) ->
RedisManager._getKeysFromNode node, pattern, cb
async.concatSeries nodes, doKeyLookupForNode, callback
// iterate over keys asynchronously using redis scan (non-blocking)
// handle all the cluster nodes or single redis server
_getKeys(pattern, callback) {
const nodes = (typeof rclient.nodes === 'function' ? rclient.nodes('master') : undefined) || [ rclient ];
const doKeyLookupForNode = (node, cb) => RedisManager._getKeysFromNode(node, pattern, cb);
return async.concatSeries(nodes, doKeyLookupForNode, callback);
},
_getKeysFromNode: (node, pattern, callback) ->
cursor = 0 # redis iterator
keySet = {} # use hash to avoid duplicate results
# scan over all keys looking for pattern
doIteration = (cb) ->
node.scan cursor, "MATCH", pattern, "COUNT", 1000, (error, reply) ->
return callback(error) if error?
[cursor, keys] = reply
for key in keys
keySet[key] = true
if cursor == '0' # note redis returns string result not numeric
return callback(null, Object.keys(keySet))
else
doIteration()
doIteration()
_getKeysFromNode(node, pattern, callback) {
let cursor = 0; // redis iterator
const keySet = {}; // use hash to avoid duplicate results
// scan over all keys looking for pattern
var doIteration = cb =>
node.scan(cursor, "MATCH", pattern, "COUNT", 1000, function(error, reply) {
let keys;
if (error != null) { return callback(error); }
[cursor, keys] = Array.from(reply);
for (let key of Array.from(keys)) {
keySet[key] = true;
}
if (cursor === '0') { // note redis returns string result not numeric
return callback(null, Object.keys(keySet));
} else {
return doIteration();
}
})
;
return doIteration();
},
# extract ids from keys like DocsWithHistoryOps:57fd0b1f53a8396d22b2c24b
# or DocsWithHistoryOps:{57fd0b1f53a8396d22b2c24b} (for redis cluster)
_extractIds: (keyList) ->
ids = for key in keyList
m = key.match(/:\{?([0-9a-f]{24})\}?/) # extract object id
m[1]
return ids
// extract ids from keys like DocsWithHistoryOps:57fd0b1f53a8396d22b2c24b
// or DocsWithHistoryOps:{57fd0b1f53a8396d22b2c24b} (for redis cluster)
_extractIds(keyList) {
const ids = (() => {
const result = [];
for (let key of Array.from(keyList)) {
const m = key.match(/:\{?([0-9a-f]{24})\}?/); // extract object id
result.push(m[1]);
}
return result;
})();
return ids;
},
getProjectIdsWithHistoryOps: (callback = (error, project_ids) ->) ->
RedisManager._getKeys Keys.docsWithHistoryOps({project_id:"*"}), (error, project_keys) ->
return callback(error) if error?
project_ids = RedisManager._extractIds project_keys
callback(error, project_ids)
getProjectIdsWithHistoryOps(callback) {
if (callback == null) { callback = function(error, project_ids) {}; }
return RedisManager._getKeys(Keys.docsWithHistoryOps({project_id:"*"}), function(error, project_keys) {
if (error != null) { return callback(error); }
const project_ids = RedisManager._extractIds(project_keys);
return callback(error, project_ids);
});
},
getAllDocIdsWithHistoryOps: (callback = (error, doc_ids) ->) ->
# return all the docids, to find dangling history entries after
# everything is flushed.
RedisManager._getKeys Keys.uncompressedHistoryOps({doc_id:"*"}), (error, doc_keys) ->
return callback(error) if error?
doc_ids = RedisManager._extractIds doc_keys
callback(error, doc_ids)
getAllDocIdsWithHistoryOps(callback) {
// return all the docids, to find dangling history entries after
// everything is flushed.
if (callback == null) { callback = function(error, doc_ids) {}; }
return RedisManager._getKeys(Keys.uncompressedHistoryOps({doc_id:"*"}), function(error, doc_keys) {
if (error != null) { return callback(error); }
const doc_ids = RedisManager._extractIds(doc_keys);
return callback(error, doc_ids);
});
}
});

View file

@ -1,12 +1,24 @@
DocumentUpdaterManager = require "./DocumentUpdaterManager"
DiffManager = require "./DiffManager"
logger = require "logger-sharelatex"
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let RestoreManager;
const DocumentUpdaterManager = require("./DocumentUpdaterManager");
const DiffManager = require("./DiffManager");
const logger = require("logger-sharelatex");
module.exports = RestoreManager =
restoreToBeforeVersion: (project_id, doc_id, version, user_id, callback = (error) ->) ->
logger.log project_id: project_id, doc_id: doc_id, version: version, user_id: user_id, "restoring document"
DiffManager.getDocumentBeforeVersion project_id, doc_id, version, (error, content) ->
return callback(error) if error?
DocumentUpdaterManager.setDocument project_id, doc_id, content, user_id, (error) ->
return callback(error) if error?
callback()
module.exports = (RestoreManager = {
restoreToBeforeVersion(project_id, doc_id, version, user_id, callback) {
if (callback == null) { callback = function(error) {}; }
logger.log({project_id, doc_id, version, user_id}, "restoring document");
return DiffManager.getDocumentBeforeVersion(project_id, doc_id, version, function(error, content) {
if (error != null) { return callback(error); }
return DocumentUpdaterManager.setDocument(project_id, doc_id, content, user_id, function(error) {
if (error != null) { return callback(error); }
return callback();
});
});
}
});

View file

@ -1,218 +1,278 @@
strInject = (s1, pos, s2) -> s1[...pos] + s2 + s1[pos..]
strRemove = (s1, pos, length) -> s1[...pos] + s1[(pos + length)..]
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS103: Rewrite code to no longer use __guard__
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let oneMinute, twoMegabytes, UpdateCompressor;
const strInject = (s1, pos, s2) => s1.slice(0, pos) + s2 + s1.slice(pos);
const strRemove = (s1, pos, length) => s1.slice(0, pos) + s1.slice((pos + length));
diff_match_patch = require("../lib/diff_match_patch").diff_match_patch
dmp = new diff_match_patch()
const { diff_match_patch } = require("../lib/diff_match_patch");
const dmp = new diff_match_patch();
module.exports = UpdateCompressor =
NOOP: "noop"
module.exports = (UpdateCompressor = {
NOOP: "noop",
# Updates come from the doc updater in format
# {
# op: [ { ... op1 ... }, { ... op2 ... } ]
# meta: { ts: ..., user_id: ... }
# }
# but it's easier to work with on op per update, so convert these updates to
# our compressed format
# [{
# op: op1
# meta: { start_ts: ... , end_ts: ..., user_id: ... }
# }, {
# op: op2
# meta: { start_ts: ... , end_ts: ..., user_id: ... }
# }]
convertToSingleOpUpdates: (updates) ->
splitUpdates = []
for update in updates
# Reject any non-insert or delete ops, i.e. comments
ops = update.op.filter (o) -> o.i? or o.d?
if ops.length == 0
splitUpdates.push
op: UpdateCompressor.NOOP
meta:
start_ts: update.meta.start_ts or update.meta.ts
end_ts: update.meta.end_ts or update.meta.ts
// Updates come from the doc updater in format
// {
// op: [ { ... op1 ... }, { ... op2 ... } ]
// meta: { ts: ..., user_id: ... }
// }
// but it's easier to work with on op per update, so convert these updates to
// our compressed format
// [{
// op: op1
// meta: { start_ts: ... , end_ts: ..., user_id: ... }
// }, {
// op: op2
// meta: { start_ts: ... , end_ts: ..., user_id: ... }
// }]
convertToSingleOpUpdates(updates) {
const splitUpdates = [];
for (let update of Array.from(updates)) {
// Reject any non-insert or delete ops, i.e. comments
const ops = update.op.filter(o => (o.i != null) || (o.d != null));
if (ops.length === 0) {
splitUpdates.push({
op: UpdateCompressor.NOOP,
meta: {
start_ts: update.meta.start_ts || update.meta.ts,
end_ts: update.meta.end_ts || update.meta.ts,
user_id: update.meta.user_id
},
v: update.v
else
for op in ops
splitUpdates.push
op: op
meta:
start_ts: update.meta.start_ts or update.meta.ts
end_ts: update.meta.end_ts or update.meta.ts
});
} else {
for (let op of Array.from(ops)) {
splitUpdates.push({
op,
meta: {
start_ts: update.meta.start_ts || update.meta.ts,
end_ts: update.meta.end_ts || update.meta.ts,
user_id: update.meta.user_id
},
v: update.v
return splitUpdates
});
}
}
}
return splitUpdates;
},
concatUpdatesWithSameVersion: (updates) ->
concattedUpdates = []
for update in updates
lastUpdate = concattedUpdates[concattedUpdates.length - 1]
if lastUpdate? and lastUpdate.v == update.v
lastUpdate.op.push update.op unless update.op == UpdateCompressor.NOOP
else
nextUpdate =
op: []
meta: update.meta
concatUpdatesWithSameVersion(updates) {
const concattedUpdates = [];
for (let update of Array.from(updates)) {
const lastUpdate = concattedUpdates[concattedUpdates.length - 1];
if ((lastUpdate != null) && (lastUpdate.v === update.v)) {
if (update.op !== UpdateCompressor.NOOP) { lastUpdate.op.push(update.op); }
} else {
const nextUpdate = {
op: [],
meta: update.meta,
v: update.v
nextUpdate.op.push update.op unless update.op == UpdateCompressor.NOOP
concattedUpdates.push nextUpdate
return concattedUpdates
};
if (update.op !== UpdateCompressor.NOOP) { nextUpdate.op.push(update.op); }
concattedUpdates.push(nextUpdate);
}
}
return concattedUpdates;
},
compressRawUpdates: (lastPreviousUpdate, rawUpdates) ->
if lastPreviousUpdate?.op?.length > 1
# if the last previous update was an array op, don't compress onto it.
# The avoids cases where array length changes but version number doesn't
return [lastPreviousUpdate].concat UpdateCompressor.compressRawUpdates(null,rawUpdates)
if lastPreviousUpdate?
rawUpdates = [lastPreviousUpdate].concat(rawUpdates)
updates = UpdateCompressor.convertToSingleOpUpdates(rawUpdates)
updates = UpdateCompressor.compressUpdates(updates)
return UpdateCompressor.concatUpdatesWithSameVersion(updates)
compressRawUpdates(lastPreviousUpdate, rawUpdates) {
if (__guard__(lastPreviousUpdate != null ? lastPreviousUpdate.op : undefined, x => x.length) > 1) {
// if the last previous update was an array op, don't compress onto it.
// The avoids cases where array length changes but version number doesn't
return [lastPreviousUpdate].concat(UpdateCompressor.compressRawUpdates(null,rawUpdates));
}
if (lastPreviousUpdate != null) {
rawUpdates = [lastPreviousUpdate].concat(rawUpdates);
}
let updates = UpdateCompressor.convertToSingleOpUpdates(rawUpdates);
updates = UpdateCompressor.compressUpdates(updates);
return UpdateCompressor.concatUpdatesWithSameVersion(updates);
},
compressUpdates: (updates) ->
return [] if updates.length == 0
compressUpdates(updates) {
if (updates.length === 0) { return []; }
compressedUpdates = [updates.shift()]
for update in updates
lastCompressedUpdate = compressedUpdates.pop()
if lastCompressedUpdate?
compressedUpdates = compressedUpdates.concat UpdateCompressor._concatTwoUpdates lastCompressedUpdate, update
else
compressedUpdates.push update
return compressedUpdates
MAX_TIME_BETWEEN_UPDATES: oneMinute = 60 * 1000
MAX_UPDATE_SIZE: twoMegabytes = 2* 1024 * 1024
_concatTwoUpdates: (firstUpdate, secondUpdate) ->
firstUpdate =
op: firstUpdate.op
meta:
user_id: firstUpdate.meta.user_id or null
start_ts: firstUpdate.meta.start_ts or firstUpdate.meta.ts
end_ts: firstUpdate.meta.end_ts or firstUpdate.meta.ts
v: firstUpdate.v
secondUpdate =
op: secondUpdate.op
meta:
user_id: secondUpdate.meta.user_id or null
start_ts: secondUpdate.meta.start_ts or secondUpdate.meta.ts
end_ts: secondUpdate.meta.end_ts or secondUpdate.meta.ts
v: secondUpdate.v
if firstUpdate.meta.user_id != secondUpdate.meta.user_id
return [firstUpdate, secondUpdate]
if secondUpdate.meta.start_ts - firstUpdate.meta.end_ts > UpdateCompressor.MAX_TIME_BETWEEN_UPDATES
return [firstUpdate, secondUpdate]
firstOp = firstUpdate.op
secondOp = secondUpdate.op
firstSize = firstOp.i?.length or firstOp.d?.length
secondSize = secondOp.i?.length or secondOp.d?.length
# Two inserts
if firstOp.i? and secondOp.i? and firstOp.p <= secondOp.p <= (firstOp.p + firstOp.i.length) and firstSize + secondSize < UpdateCompressor.MAX_UPDATE_SIZE
return [
meta:
start_ts: firstUpdate.meta.start_ts
end_ts: secondUpdate.meta.end_ts
user_id: firstUpdate.meta.user_id
op:
p: firstOp.p
i: strInject(firstOp.i, secondOp.p - firstOp.p, secondOp.i)
v: secondUpdate.v
]
# Two deletes
else if firstOp.d? and secondOp.d? and secondOp.p <= firstOp.p <= (secondOp.p + secondOp.d.length) and firstSize + secondSize < UpdateCompressor.MAX_UPDATE_SIZE
return [
meta:
start_ts: firstUpdate.meta.start_ts
end_ts: secondUpdate.meta.end_ts
user_id: firstUpdate.meta.user_id
op:
p: secondOp.p
d: strInject(secondOp.d, firstOp.p - secondOp.p, firstOp.d)
v: secondUpdate.v
]
# An insert and then a delete
else if firstOp.i? and secondOp.d? and firstOp.p <= secondOp.p <= (firstOp.p + firstOp.i.length)
offset = secondOp.p - firstOp.p
insertedText = firstOp.i.slice(offset, offset + secondOp.d.length)
# Only trim the insert when the delete is fully contained within in it
if insertedText == secondOp.d
insert = strRemove(firstOp.i, offset, secondOp.d.length)
return [
meta:
start_ts: firstUpdate.meta.start_ts
end_ts: secondUpdate.meta.end_ts
user_id: firstUpdate.meta.user_id
op:
p: firstOp.p
i: insert
v: secondUpdate.v
]
else
# This will only happen if the delete extends outside the insert
return [firstUpdate, secondUpdate]
# A delete then an insert at the same place, likely a copy-paste of a chunk of content
else if firstOp.d? and secondOp.i? and firstOp.p == secondOp.p
offset = firstOp.p
diff_ops = @diffAsShareJsOps(firstOp.d, secondOp.i)
if diff_ops.length == 0
return [{ # Noop
meta:
start_ts: firstUpdate.meta.start_ts
end_ts: secondUpdate.meta.end_ts
user_id: firstUpdate.meta.user_id
op:
p: firstOp.p
i: ""
v: secondUpdate.v
}]
else
return diff_ops.map (op) ->
op.p += offset
return {
meta:
start_ts: firstUpdate.meta.start_ts
end_ts: secondUpdate.meta.end_ts
user_id: firstUpdate.meta.user_id
op: op
v: secondUpdate.v
let compressedUpdates = [updates.shift()];
for (let update of Array.from(updates)) {
const lastCompressedUpdate = compressedUpdates.pop();
if (lastCompressedUpdate != null) {
compressedUpdates = compressedUpdates.concat(UpdateCompressor._concatTwoUpdates(lastCompressedUpdate, update));
} else {
compressedUpdates.push(update);
}
}
else
return [firstUpdate, secondUpdate]
return compressedUpdates;
},
ADDED: 1
REMOVED: -1
UNCHANGED: 0
diffAsShareJsOps: (before, after, callback = (error, ops) ->) ->
diffs = dmp.diff_main(before, after)
dmp.diff_cleanupSemantic(diffs)
MAX_TIME_BETWEEN_UPDATES: (oneMinute = 60 * 1000),
MAX_UPDATE_SIZE: (twoMegabytes = 2* 1024 * 1024),
ops = []
position = 0
for diff in diffs
type = diff[0]
content = diff[1]
if type == @ADDED
ops.push
i: content
_concatTwoUpdates(firstUpdate, secondUpdate) {
let offset;
firstUpdate = {
op: firstUpdate.op,
meta: {
user_id: firstUpdate.meta.user_id || null,
start_ts: firstUpdate.meta.start_ts || firstUpdate.meta.ts,
end_ts: firstUpdate.meta.end_ts || firstUpdate.meta.ts
},
v: firstUpdate.v
};
secondUpdate = {
op: secondUpdate.op,
meta: {
user_id: secondUpdate.meta.user_id || null,
start_ts: secondUpdate.meta.start_ts || secondUpdate.meta.ts,
end_ts: secondUpdate.meta.end_ts || secondUpdate.meta.ts
},
v: secondUpdate.v
};
if (firstUpdate.meta.user_id !== secondUpdate.meta.user_id) {
return [firstUpdate, secondUpdate];
}
if ((secondUpdate.meta.start_ts - firstUpdate.meta.end_ts) > UpdateCompressor.MAX_TIME_BETWEEN_UPDATES) {
return [firstUpdate, secondUpdate];
}
const firstOp = firstUpdate.op;
const secondOp = secondUpdate.op;
const firstSize = (firstOp.i != null ? firstOp.i.length : undefined) || (firstOp.d != null ? firstOp.d.length : undefined);
const secondSize = (secondOp.i != null ? secondOp.i.length : undefined) || (secondOp.d != null ? secondOp.d.length : undefined);
// Two inserts
if ((firstOp.i != null) && (secondOp.i != null) && (firstOp.p <= secondOp.p && secondOp.p <= (firstOp.p + firstOp.i.length)) && ((firstSize + secondSize) < UpdateCompressor.MAX_UPDATE_SIZE)) {
return [{
meta: {
start_ts: firstUpdate.meta.start_ts,
end_ts: secondUpdate.meta.end_ts,
user_id: firstUpdate.meta.user_id
},
op: {
p: firstOp.p,
i: strInject(firstOp.i, secondOp.p - firstOp.p, secondOp.i)
},
v: secondUpdate.v
}
];
// Two deletes
} else if ((firstOp.d != null) && (secondOp.d != null) && (secondOp.p <= firstOp.p && firstOp.p <= (secondOp.p + secondOp.d.length)) && ((firstSize + secondSize) < UpdateCompressor.MAX_UPDATE_SIZE)) {
return [{
meta: {
start_ts: firstUpdate.meta.start_ts,
end_ts: secondUpdate.meta.end_ts,
user_id: firstUpdate.meta.user_id
},
op: {
p: secondOp.p,
d: strInject(secondOp.d, firstOp.p - secondOp.p, firstOp.d)
},
v: secondUpdate.v
}
];
// An insert and then a delete
} else if ((firstOp.i != null) && (secondOp.d != null) && (firstOp.p <= secondOp.p && secondOp.p <= (firstOp.p + firstOp.i.length))) {
offset = secondOp.p - firstOp.p;
const insertedText = firstOp.i.slice(offset, offset + secondOp.d.length);
// Only trim the insert when the delete is fully contained within in it
if (insertedText === secondOp.d) {
const insert = strRemove(firstOp.i, offset, secondOp.d.length);
return [{
meta: {
start_ts: firstUpdate.meta.start_ts,
end_ts: secondUpdate.meta.end_ts,
user_id: firstUpdate.meta.user_id
},
op: {
p: firstOp.p,
i: insert
},
v: secondUpdate.v
}
];
} else {
// This will only happen if the delete extends outside the insert
return [firstUpdate, secondUpdate];
}
// A delete then an insert at the same place, likely a copy-paste of a chunk of content
} else if ((firstOp.d != null) && (secondOp.i != null) && (firstOp.p === secondOp.p)) {
offset = firstOp.p;
const diff_ops = this.diffAsShareJsOps(firstOp.d, secondOp.i);
if (diff_ops.length === 0) {
return [{ // Noop
meta: {
start_ts: firstUpdate.meta.start_ts,
end_ts: secondUpdate.meta.end_ts,
user_id: firstUpdate.meta.user_id
},
op: {
p: firstOp.p,
i: ""
},
v: secondUpdate.v
}];
} else {
return diff_ops.map(function(op) {
op.p += offset;
return {
meta: {
start_ts: firstUpdate.meta.start_ts,
end_ts: secondUpdate.meta.end_ts,
user_id: firstUpdate.meta.user_id
},
op,
v: secondUpdate.v
};});
}
} else {
return [firstUpdate, secondUpdate];
}
},
ADDED: 1,
REMOVED: -1,
UNCHANGED: 0,
diffAsShareJsOps(before, after, callback) {
if (callback == null) { callback = function(error, ops) {}; }
const diffs = dmp.diff_main(before, after);
dmp.diff_cleanupSemantic(diffs);
const ops = [];
let position = 0;
for (let diff of Array.from(diffs)) {
const type = diff[0];
const content = diff[1];
if (type === this.ADDED) {
ops.push({
i: content,
p: position
position += content.length
else if type == @REMOVED
ops.push
d: content
});
position += content.length;
} else if (type === this.REMOVED) {
ops.push({
d: content,
p: position
else if type == @UNCHANGED
position += content.length
else
throw "Unknown type"
return ops
});
} else if (type === this.UNCHANGED) {
position += content.length;
} else {
throw "Unknown type";
}
}
return ops;
}
});
function __guard__(value, transform) {
return (typeof value !== 'undefined' && value !== null) ? transform(value) : undefined;
}

View file

@ -1,23 +1,44 @@
MongoManager = require "./MongoManager"
WebApiManager = require "./WebApiManager"
logger = require "logger-sharelatex"
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS103: Rewrite code to no longer use __guard__
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let UpdateTrimmer;
const MongoManager = require("./MongoManager");
const WebApiManager = require("./WebApiManager");
const logger = require("logger-sharelatex");
module.exports = UpdateTrimmer =
shouldTrimUpdates: (project_id, callback = (error, shouldTrim) ->) ->
MongoManager.getProjectMetaData project_id, (error, metadata) ->
return callback(error) if error?
if metadata?.preserveHistory
return callback null, false
else
WebApiManager.getProjectDetails project_id, (error, details) ->
return callback(error) if error?
logger.log project_id: project_id, details: details, "got details"
if details?.features?.versioning
MongoManager.setProjectMetaData project_id, preserveHistory: true, (error) ->
return callback(error) if error?
MongoManager.upgradeHistory project_id, (error) ->
return callback(error) if error?
callback null, false
else
callback null, true
module.exports = (UpdateTrimmer = {
shouldTrimUpdates(project_id, callback) {
if (callback == null) { callback = function(error, shouldTrim) {}; }
return MongoManager.getProjectMetaData(project_id, function(error, metadata) {
if (error != null) { return callback(error); }
if (metadata != null ? metadata.preserveHistory : undefined) {
return callback(null, false);
} else {
return WebApiManager.getProjectDetails(project_id, function(error, details) {
if (error != null) { return callback(error); }
logger.log({project_id, details}, "got details");
if (__guard__(details != null ? details.features : undefined, x => x.versioning)) {
return MongoManager.setProjectMetaData(project_id, {preserveHistory: true}, function(error) {
if (error != null) { return callback(error); }
return MongoManager.upgradeHistory(project_id, function(error) {
if (error != null) { return callback(error); }
return callback(null, false);
});
});
} else {
return callback(null, true);
}
});
}
});
}
});
function __guard__(value, transform) {
return (typeof value !== 'undefined' && value !== null) ? transform(value) : undefined;
}

View file

@ -1,344 +1,494 @@
MongoManager = require "./MongoManager"
PackManager = require "./PackManager"
RedisManager = require "./RedisManager"
UpdateCompressor = require "./UpdateCompressor"
LockManager = require "./LockManager"
WebApiManager = require "./WebApiManager"
UpdateTrimmer = require "./UpdateTrimmer"
logger = require "logger-sharelatex"
async = require "async"
_ = require "underscore"
Settings = require "settings-sharelatex"
keys = Settings.redis.lock.key_schema
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS103: Rewrite code to no longer use __guard__
* DS205: Consider reworking code to avoid use of IIFEs
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let fiveMinutes, UpdatesManager;
const MongoManager = require("./MongoManager");
const PackManager = require("./PackManager");
const RedisManager = require("./RedisManager");
const UpdateCompressor = require("./UpdateCompressor");
const LockManager = require("./LockManager");
const WebApiManager = require("./WebApiManager");
const UpdateTrimmer = require("./UpdateTrimmer");
const logger = require("logger-sharelatex");
const async = require("async");
const _ = require("underscore");
const Settings = require("settings-sharelatex");
const keys = Settings.redis.lock.key_schema;
module.exports = UpdatesManager =
compressAndSaveRawUpdates: (project_id, doc_id, rawUpdates, temporary, callback = (error) ->) ->
length = rawUpdates.length
if length == 0
return callback()
module.exports = (UpdatesManager = {
compressAndSaveRawUpdates(project_id, doc_id, rawUpdates, temporary, callback) {
let i;
if (callback == null) { callback = function(error) {}; }
const { length } = rawUpdates;
if (length === 0) {
return callback();
}
# check that ops are in the correct order
for op, i in rawUpdates when i > 0
thisVersion = op?.v
prevVersion = rawUpdates[i-1]?.v
if not (prevVersion < thisVersion)
logger.error project_id: project_id, doc_id: doc_id, rawUpdates:rawUpdates, temporary: temporary, thisVersion:thisVersion, prevVersion:prevVersion, "op versions out of order"
// check that ops are in the correct order
for (i = 0; i < rawUpdates.length; i++) {
const op = rawUpdates[i];
if (i > 0) {
const thisVersion = op != null ? op.v : undefined;
const prevVersion = __guard__(rawUpdates[i-1], x => x.v);
if (!(prevVersion < thisVersion)) {
logger.error({project_id, doc_id, rawUpdates, temporary, thisVersion, prevVersion}, "op versions out of order");
}
}
}
# FIXME: we no longer need the lastCompressedUpdate, so change functions not to need it
# CORRECTION: we do use it to log the time in case of error
MongoManager.peekLastCompressedUpdate doc_id, (error, lastCompressedUpdate, lastVersion) ->
# lastCompressedUpdate is the most recent update in Mongo, and
# lastVersion is its sharejs version number.
#
# The peekLastCompressedUpdate method may pass the update back
# as 'null' (for example if the previous compressed update has
# been archived). In this case it can still pass back the
# lastVersion from the update to allow us to check consistency.
return callback(error) if error?
// FIXME: we no longer need the lastCompressedUpdate, so change functions not to need it
// CORRECTION: we do use it to log the time in case of error
return MongoManager.peekLastCompressedUpdate(doc_id, function(error, lastCompressedUpdate, lastVersion) {
// lastCompressedUpdate is the most recent update in Mongo, and
// lastVersion is its sharejs version number.
//
// The peekLastCompressedUpdate method may pass the update back
// as 'null' (for example if the previous compressed update has
// been archived). In this case it can still pass back the
// lastVersion from the update to allow us to check consistency.
let op;
if (error != null) { return callback(error); }
# Ensure that raw updates start where lastVersion left off
if lastVersion?
discardedUpdates = []
rawUpdates = rawUpdates.slice(0)
while rawUpdates[0]? and rawUpdates[0].v <= lastVersion
discardedUpdates.push rawUpdates.shift()
if discardedUpdates.length
logger.error project_id: project_id, doc_id: doc_id, discardedUpdates: discardedUpdates, temporary: temporary, lastVersion: lastVersion, "discarded updates already present"
// Ensure that raw updates start where lastVersion left off
if (lastVersion != null) {
const discardedUpdates = [];
rawUpdates = rawUpdates.slice(0);
while ((rawUpdates[0] != null) && (rawUpdates[0].v <= lastVersion)) {
discardedUpdates.push(rawUpdates.shift());
}
if (discardedUpdates.length) {
logger.error({project_id, doc_id, discardedUpdates, temporary, lastVersion}, "discarded updates already present");
}
if rawUpdates[0]? and rawUpdates[0].v != lastVersion + 1
ts = lastCompressedUpdate?.meta?.end_ts
last_timestamp = if ts? then new Date(ts) else 'unknown time'
error = new Error("Tried to apply raw op at version #{rawUpdates[0].v} to last compressed update with version #{lastVersion} from #{last_timestamp}")
logger.error err: error, doc_id: doc_id, project_id: project_id, prev_end_ts: ts, temporary: temporary, lastCompressedUpdate: lastCompressedUpdate, "inconsistent doc versions"
if Settings.trackchanges?.continueOnError and rawUpdates[0].v > lastVersion + 1
# we have lost some ops - continue to write into the database, we can't recover at this point
lastCompressedUpdate = null
else
return callback error
if ((rawUpdates[0] != null) && (rawUpdates[0].v !== (lastVersion + 1))) {
const ts = __guard__(lastCompressedUpdate != null ? lastCompressedUpdate.meta : undefined, x1 => x1.end_ts);
const last_timestamp = (ts != null) ? new Date(ts) : 'unknown time';
error = new Error(`Tried to apply raw op at version ${rawUpdates[0].v} to last compressed update with version ${lastVersion} from ${last_timestamp}`);
logger.error({err: error, doc_id, project_id, prev_end_ts: ts, temporary, lastCompressedUpdate}, "inconsistent doc versions");
if ((Settings.trackchanges != null ? Settings.trackchanges.continueOnError : undefined) && (rawUpdates[0].v > (lastVersion + 1))) {
// we have lost some ops - continue to write into the database, we can't recover at this point
lastCompressedUpdate = null;
} else {
return callback(error);
}
}
}
if rawUpdates.length == 0
return callback()
if (rawUpdates.length === 0) {
return callback();
}
# some old large ops in redis need to be rejected, they predate
# the size limit that now prevents them going through the system
REJECT_LARGE_OP_SIZE = 4 * 1024 * 1024
for rawUpdate in rawUpdates
opSizes = ((op.i?.length || op.d?.length) for op in rawUpdate?.op or [])
size = _.max opSizes
if size > REJECT_LARGE_OP_SIZE
error = new Error("dropped op exceeding maximum allowed size of #{REJECT_LARGE_OP_SIZE}")
logger.error err: error, doc_id: doc_id, project_id: project_id, size: size, rawUpdate: rawUpdate, "dropped op - too big"
rawUpdate.op = []
// some old large ops in redis need to be rejected, they predate
// the size limit that now prevents them going through the system
const REJECT_LARGE_OP_SIZE = 4 * 1024 * 1024;
for (var rawUpdate of Array.from(rawUpdates)) {
const opSizes = ((() => {
const result = [];
for (op of Array.from((rawUpdate != null ? rawUpdate.op : undefined) || [])) { result.push(((op.i != null ? op.i.length : undefined) || (op.d != null ? op.d.length : undefined)));
}
return result;
})());
const size = _.max(opSizes);
if (size > REJECT_LARGE_OP_SIZE) {
error = new Error(`dropped op exceeding maximum allowed size of ${REJECT_LARGE_OP_SIZE}`);
logger.error({err: error, doc_id, project_id, size, rawUpdate}, "dropped op - too big");
rawUpdate.op = [];
}
}
compressedUpdates = UpdateCompressor.compressRawUpdates null, rawUpdates
PackManager.insertCompressedUpdates project_id, doc_id, lastCompressedUpdate, compressedUpdates, temporary, (error, result) ->
return callback(error) if error?
logger.log {project_id, doc_id, orig_v: lastCompressedUpdate?.v, new_v: result.v}, "inserted updates into pack" if result?
callback()
const compressedUpdates = UpdateCompressor.compressRawUpdates(null, rawUpdates);
return PackManager.insertCompressedUpdates(project_id, doc_id, lastCompressedUpdate, compressedUpdates, temporary, function(error, result) {
if (error != null) { return callback(error); }
if (result != null) { logger.log({project_id, doc_id, orig_v: (lastCompressedUpdate != null ? lastCompressedUpdate.v : undefined), new_v: result.v}, "inserted updates into pack"); }
return callback();
});
});
},
# Check whether the updates are temporary (per-project property)
_prepareProjectForUpdates: (project_id, callback = (error, temporary) ->) ->
UpdateTrimmer.shouldTrimUpdates project_id, (error, temporary) ->
return callback(error) if error?
callback(null, temporary)
// Check whether the updates are temporary (per-project property)
_prepareProjectForUpdates(project_id, callback) {
if (callback == null) { callback = function(error, temporary) {}; }
return UpdateTrimmer.shouldTrimUpdates(project_id, function(error, temporary) {
if (error != null) { return callback(error); }
return callback(null, temporary);
});
},
# Check for project id on document history (per-document property)
_prepareDocForUpdates: (project_id, doc_id, callback = (error) ->) ->
MongoManager.backportProjectId project_id, doc_id, (error) ->
return callback(error) if error?
callback(null)
// Check for project id on document history (per-document property)
_prepareDocForUpdates(project_id, doc_id, callback) {
if (callback == null) { callback = function(error) {}; }
return MongoManager.backportProjectId(project_id, doc_id, function(error) {
if (error != null) { return callback(error); }
return callback(null);
});
},
# Apply updates for specific project/doc after preparing at project and doc level
REDIS_READ_BATCH_SIZE: 100
processUncompressedUpdates: (project_id, doc_id, temporary, callback = (error) ->) ->
# get the updates as strings from redis (so we can delete them after they are applied)
RedisManager.getOldestDocUpdates doc_id, UpdatesManager.REDIS_READ_BATCH_SIZE, (error, docUpdates) ->
return callback(error) if error?
length = docUpdates.length
# parse the redis strings into ShareJs updates
RedisManager.expandDocUpdates docUpdates, (error, rawUpdates) ->
if error?
logger.err project_id: project_id, doc_id: doc_id, docUpdates: docUpdates, "failed to parse docUpdates"
return callback(error)
logger.log project_id: project_id, doc_id: doc_id, rawUpdates: rawUpdates, "retrieved raw updates from redis"
UpdatesManager.compressAndSaveRawUpdates project_id, doc_id, rawUpdates, temporary, (error) ->
return callback(error) if error?
logger.log project_id: project_id, doc_id: doc_id, "compressed and saved doc updates"
# delete the applied updates from redis
RedisManager.deleteAppliedDocUpdates project_id, doc_id, docUpdates, (error) ->
return callback(error) if error?
if length == UpdatesManager.REDIS_READ_BATCH_SIZE
# There might be more updates
logger.log project_id: project_id, doc_id: doc_id, "continuing processing updates"
setTimeout () ->
UpdatesManager.processUncompressedUpdates project_id, doc_id, temporary, callback
, 0
else
logger.log project_id: project_id, doc_id: doc_id, "all raw updates processed"
callback()
// Apply updates for specific project/doc after preparing at project and doc level
REDIS_READ_BATCH_SIZE: 100,
processUncompressedUpdates(project_id, doc_id, temporary, callback) {
// get the updates as strings from redis (so we can delete them after they are applied)
if (callback == null) { callback = function(error) {}; }
return RedisManager.getOldestDocUpdates(doc_id, UpdatesManager.REDIS_READ_BATCH_SIZE, function(error, docUpdates) {
if (error != null) { return callback(error); }
const { length } = docUpdates;
// parse the redis strings into ShareJs updates
return RedisManager.expandDocUpdates(docUpdates, function(error, rawUpdates) {
if (error != null) {
logger.err({project_id, doc_id, docUpdates}, "failed to parse docUpdates");
return callback(error);
}
logger.log({project_id, doc_id, rawUpdates}, "retrieved raw updates from redis");
return UpdatesManager.compressAndSaveRawUpdates(project_id, doc_id, rawUpdates, temporary, function(error) {
if (error != null) { return callback(error); }
logger.log({project_id, doc_id}, "compressed and saved doc updates");
// delete the applied updates from redis
return RedisManager.deleteAppliedDocUpdates(project_id, doc_id, docUpdates, function(error) {
if (error != null) { return callback(error); }
if (length === UpdatesManager.REDIS_READ_BATCH_SIZE) {
// There might be more updates
logger.log({project_id, doc_id}, "continuing processing updates");
return setTimeout(() => UpdatesManager.processUncompressedUpdates(project_id, doc_id, temporary, callback)
, 0);
} else {
logger.log({project_id, doc_id}, "all raw updates processed");
return callback();
}
});
});
});
});
},
# Process updates for a doc when we flush it individually
processUncompressedUpdatesWithLock: (project_id, doc_id, callback = (error) ->) ->
UpdatesManager._prepareProjectForUpdates project_id, (error, temporary) ->
return callback(error) if error?
UpdatesManager._processUncompressedUpdatesForDocWithLock project_id, doc_id, temporary, callback
// Process updates for a doc when we flush it individually
processUncompressedUpdatesWithLock(project_id, doc_id, callback) {
if (callback == null) { callback = function(error) {}; }
return UpdatesManager._prepareProjectForUpdates(project_id, function(error, temporary) {
if (error != null) { return callback(error); }
return UpdatesManager._processUncompressedUpdatesForDocWithLock(project_id, doc_id, temporary, callback);
});
},
# Process updates for a doc when the whole project is flushed (internal method)
_processUncompressedUpdatesForDocWithLock: (project_id, doc_id, temporary, callback = (error) ->) ->
UpdatesManager._prepareDocForUpdates project_id, doc_id, (error) ->
return callback(error) if error?
LockManager.runWithLock(
// Process updates for a doc when the whole project is flushed (internal method)
_processUncompressedUpdatesForDocWithLock(project_id, doc_id, temporary, callback) {
if (callback == null) { callback = function(error) {}; }
return UpdatesManager._prepareDocForUpdates(project_id, doc_id, function(error) {
if (error != null) { return callback(error); }
return LockManager.runWithLock(
keys.historyLock({doc_id}),
(releaseLock) ->
UpdatesManager.processUncompressedUpdates project_id, doc_id, temporary, releaseLock
releaseLock => UpdatesManager.processUncompressedUpdates(project_id, doc_id, temporary, releaseLock),
callback
);
});
},
// Process all updates for a project, only check project-level information once
processUncompressedUpdatesForProject(project_id, callback) {
if (callback == null) { callback = function(error) {}; }
return RedisManager.getDocIdsWithHistoryOps(project_id, function(error, doc_ids) {
if (error != null) { return callback(error); }
return UpdatesManager._prepareProjectForUpdates(project_id, function(error, temporary) {
const jobs = [];
for (let doc_id of Array.from(doc_ids)) {
(doc_id =>
jobs.push(cb => UpdatesManager._processUncompressedUpdatesForDocWithLock(project_id, doc_id, temporary, cb))
)(doc_id);
}
return async.parallelLimit(jobs, 5, callback);
});
});
},
// flush all outstanding changes
flushAll(limit, callback) {
if (callback == null) { callback = function(error, result) {}; }
return RedisManager.getProjectIdsWithHistoryOps(function(error, project_ids) {
let project_id;
if (error != null) { return callback(error); }
logger.log({count: (project_ids != null ? project_ids.length : undefined), project_ids}, "found projects");
const jobs = [];
project_ids = _.shuffle(project_ids); // randomise to avoid hitting same projects each time
const selectedProjects = limit < 0 ? project_ids : project_ids.slice(0, limit);
for (project_id of Array.from(selectedProjects)) {
(project_id =>
jobs.push(cb =>
UpdatesManager.processUncompressedUpdatesForProject(project_id, err => cb(null, {failed: (err != null), project_id}))
)
)(project_id);
}
return async.series(jobs, function(error, result) {
let x;
if (error != null) { return callback(error); }
const failedProjects = ((() => {
const result1 = [];
for (x of Array.from(result)) { if (x.failed) {
result1.push(x.project_id);
}
}
return result1;
})());
const succeededProjects = ((() => {
const result2 = [];
for (x of Array.from(result)) { if (!x.failed) {
result2.push(x.project_id);
}
}
return result2;
})());
return callback(null, {failed: failedProjects, succeeded: succeededProjects, all: project_ids});
});
});
},
# Process all updates for a project, only check project-level information once
processUncompressedUpdatesForProject: (project_id, callback = (error) ->) ->
RedisManager.getDocIdsWithHistoryOps project_id, (error, doc_ids) ->
return callback(error) if error?
UpdatesManager._prepareProjectForUpdates project_id, (error, temporary) ->
jobs = []
for doc_id in doc_ids
do (doc_id) ->
jobs.push (cb) ->
UpdatesManager._processUncompressedUpdatesForDocWithLock project_id, doc_id, temporary, cb
async.parallelLimit jobs, 5, callback
getDanglingUpdates(callback) {
if (callback == null) { callback = function(error, doc_ids) {}; }
return RedisManager.getAllDocIdsWithHistoryOps(function(error, all_doc_ids) {
if (error != null) { return callback(error); }
return RedisManager.getProjectIdsWithHistoryOps(function(error, all_project_ids) {
if (error != null) { return callback(error); }
// function to get doc_ids for each project
const task = cb => async.concatSeries(all_project_ids, RedisManager.getDocIdsWithHistoryOps, cb);
// find the dangling doc ids
return task(function(error, project_doc_ids) {
const dangling_doc_ids = _.difference(all_doc_ids, project_doc_ids);
logger.log({all_doc_ids, all_project_ids, project_doc_ids, dangling_doc_ids}, "checking for dangling doc ids");
return callback(null, dangling_doc_ids);
});
});
});
},
# flush all outstanding changes
flushAll: (limit, callback = (error, result) ->) ->
RedisManager.getProjectIdsWithHistoryOps (error, project_ids) ->
return callback(error) if error?
logger.log {count: project_ids?.length, project_ids: project_ids}, "found projects"
jobs = []
project_ids = _.shuffle project_ids # randomise to avoid hitting same projects each time
selectedProjects = if limit < 0 then project_ids else project_ids[0...limit]
for project_id in selectedProjects
do (project_id) ->
jobs.push (cb) ->
UpdatesManager.processUncompressedUpdatesForProject project_id, (err) ->
return cb(null, {failed: err?, project_id: project_id})
async.series jobs, (error, result) ->
return callback(error) if error?
failedProjects = (x.project_id for x in result when x.failed)
succeededProjects = (x.project_id for x in result when not x.failed)
callback(null, {failed: failedProjects, succeeded: succeededProjects, all: project_ids})
getDocUpdates(project_id, doc_id, options, callback) {
if (options == null) { options = {}; }
if (callback == null) { callback = function(error, updates) {}; }
return UpdatesManager.processUncompressedUpdatesWithLock(project_id, doc_id, function(error) {
if (error != null) { return callback(error); }
//console.log "options", options
return PackManager.getOpsByVersionRange(project_id, doc_id, options.from, options.to, function(error, updates) {
if (error != null) { return callback(error); }
return callback(null, updates);
});
});
},
getDanglingUpdates: (callback = (error, doc_ids) ->) ->
RedisManager.getAllDocIdsWithHistoryOps (error, all_doc_ids) ->
return callback(error) if error?
RedisManager.getProjectIdsWithHistoryOps (error, all_project_ids) ->
return callback(error) if error?
# function to get doc_ids for each project
task = (cb) -> async.concatSeries all_project_ids, RedisManager.getDocIdsWithHistoryOps, cb
# find the dangling doc ids
task (error, project_doc_ids) ->
dangling_doc_ids = _.difference(all_doc_ids, project_doc_ids)
logger.log {all_doc_ids: all_doc_ids, all_project_ids: all_project_ids, project_doc_ids: project_doc_ids, dangling_doc_ids: dangling_doc_ids}, "checking for dangling doc ids"
callback(null, dangling_doc_ids)
getDocUpdatesWithUserInfo(project_id, doc_id, options, callback) {
if (options == null) { options = {}; }
if (callback == null) { callback = function(error, updates) {}; }
return UpdatesManager.getDocUpdates(project_id, doc_id, options, function(error, updates) {
if (error != null) { return callback(error); }
return UpdatesManager.fillUserInfo(updates, function(error, updates) {
if (error != null) { return callback(error); }
return callback(null, updates);
});
});
},
getDocUpdates: (project_id, doc_id, options = {}, callback = (error, updates) ->) ->
UpdatesManager.processUncompressedUpdatesWithLock project_id, doc_id, (error) ->
return callback(error) if error?
#console.log "options", options
PackManager.getOpsByVersionRange project_id, doc_id, options.from, options.to, (error, updates) ->
return callback(error) if error?
callback null, updates
getSummarizedProjectUpdates(project_id, options, callback) {
if (options == null) { options = {}; }
if (callback == null) { callback = function(error, updates) {}; }
if (!options.min_count) { options.min_count = 25; }
let summarizedUpdates = [];
const { before } = options;
let nextBeforeTimestamp = null;
return UpdatesManager.processUncompressedUpdatesForProject(project_id, function(error) {
if (error != null) { return callback(error); }
return PackManager.makeProjectIterator(project_id, before, function(err, iterator) {
if (err != null) { return callback(err); }
// repeatedly get updates and pass them through the summariser to get an final output with user info
return async.whilst(() =>
//console.log "checking iterator.done", iterator.done()
(summarizedUpdates.length < options.min_count) && !iterator.done()
getDocUpdatesWithUserInfo: (project_id, doc_id, options = {}, callback = (error, updates) ->) ->
UpdatesManager.getDocUpdates project_id, doc_id, options, (error, updates) ->
return callback(error) if error?
UpdatesManager.fillUserInfo updates, (error, updates) ->
return callback(error) if error?
callback null, updates
, cb =>
iterator.next(function(err, partialUpdates) {
if (err != null) { return callback(err); }
//logger.log {partialUpdates}, 'got partialUpdates'
if (partialUpdates.length === 0) { return cb(); } //# FIXME should try to avoid this happening
nextBeforeTimestamp = partialUpdates[partialUpdates.length - 1].meta.end_ts;
// add the updates to the summary list
summarizedUpdates = UpdatesManager._summarizeUpdates(partialUpdates, summarizedUpdates);
return cb();
})
getSummarizedProjectUpdates: (project_id, options = {}, callback = (error, updates) ->) ->
options.min_count ||= 25
summarizedUpdates = []
before = options.before
nextBeforeTimestamp = null
UpdatesManager.processUncompressedUpdatesForProject project_id, (error) ->
return callback(error) if error?
PackManager.makeProjectIterator project_id, before, (err, iterator) ->
return callback(err) if err?
# repeatedly get updates and pass them through the summariser to get an final output with user info
async.whilst () ->
#console.log "checking iterator.done", iterator.done()
return summarizedUpdates.length < options.min_count and not iterator.done()
, (cb) ->
iterator.next (err, partialUpdates) ->
return callback(err) if err?
#logger.log {partialUpdates}, 'got partialUpdates'
return cb() if partialUpdates.length is 0 ## FIXME should try to avoid this happening
nextBeforeTimestamp = partialUpdates[partialUpdates.length - 1].meta.end_ts
# add the updates to the summary list
summarizedUpdates = UpdatesManager._summarizeUpdates partialUpdates, summarizedUpdates
cb()
, () ->
# finally done all updates
#console.log 'summarized Updates', summarizedUpdates
UpdatesManager.fillSummarizedUserInfo summarizedUpdates, (err, results) ->
return callback(err) if err?
callback null, results, if not iterator.done() then nextBeforeTimestamp else undefined
, () =>
// finally done all updates
//console.log 'summarized Updates', summarizedUpdates
UpdatesManager.fillSummarizedUserInfo(summarizedUpdates, function(err, results) {
if (err != null) { return callback(err); }
return callback(null, results, !iterator.done() ? nextBeforeTimestamp : undefined);
})
);
});
});
},
fetchUserInfo: (users, callback = (error, fetchedUserInfo) ->) ->
jobs = []
fetchedUserInfo = {}
for user_id of users
do (user_id) ->
jobs.push (callback) ->
WebApiManager.getUserInfo user_id, (error, userInfo) ->
return callback(error) if error?
fetchedUserInfo[user_id] = userInfo
callback()
fetchUserInfo(users, callback) {
if (callback == null) { callback = function(error, fetchedUserInfo) {}; }
const jobs = [];
const fetchedUserInfo = {};
for (let user_id in users) {
(user_id =>
jobs.push(callback =>
WebApiManager.getUserInfo(user_id, function(error, userInfo) {
if (error != null) { return callback(error); }
fetchedUserInfo[user_id] = userInfo;
return callback();
})
)
)(user_id);
}
async.series jobs, (err) ->
return callback(err) if err?
callback(null, fetchedUserInfo)
return async.series(jobs, function(err) {
if (err != null) { return callback(err); }
return callback(null, fetchedUserInfo);
});
},
fillUserInfo: (updates, callback = (error, updates) ->) ->
users = {}
for update in updates
user_id = update.meta.user_id
if UpdatesManager._validUserId(user_id)
users[user_id] = true
fillUserInfo(updates, callback) {
let update, user_id;
if (callback == null) { callback = function(error, updates) {}; }
const users = {};
for (update of Array.from(updates)) {
({ user_id } = update.meta);
if (UpdatesManager._validUserId(user_id)) {
users[user_id] = true;
}
}
UpdatesManager.fetchUserInfo users, (error, fetchedUserInfo) ->
return callback(error) if error?
for update in updates
user_id = update.meta.user_id
delete update.meta.user_id
if UpdatesManager._validUserId(user_id)
update.meta.user = fetchedUserInfo[user_id]
callback null, updates
return UpdatesManager.fetchUserInfo(users, function(error, fetchedUserInfo) {
if (error != null) { return callback(error); }
for (update of Array.from(updates)) {
({ user_id } = update.meta);
delete update.meta.user_id;
if (UpdatesManager._validUserId(user_id)) {
update.meta.user = fetchedUserInfo[user_id];
}
}
return callback(null, updates);
});
},
fillSummarizedUserInfo: (updates, callback = (error, updates) ->) ->
users = {}
for update in updates
user_ids = update.meta.user_ids or []
for user_id in user_ids
if UpdatesManager._validUserId(user_id)
users[user_id] = true
fillSummarizedUserInfo(updates, callback) {
let update, user_id, user_ids;
if (callback == null) { callback = function(error, updates) {}; }
const users = {};
for (update of Array.from(updates)) {
user_ids = update.meta.user_ids || [];
for (user_id of Array.from(user_ids)) {
if (UpdatesManager._validUserId(user_id)) {
users[user_id] = true;
}
}
}
UpdatesManager.fetchUserInfo users, (error, fetchedUserInfo) ->
return callback(error) if error?
for update in updates
user_ids = update.meta.user_ids or []
update.meta.users = []
delete update.meta.user_ids
for user_id in user_ids
if UpdatesManager._validUserId(user_id)
update.meta.users.push fetchedUserInfo[user_id]
else
update.meta.users.push null
callback null, updates
return UpdatesManager.fetchUserInfo(users, function(error, fetchedUserInfo) {
if (error != null) { return callback(error); }
for (update of Array.from(updates)) {
user_ids = update.meta.user_ids || [];
update.meta.users = [];
delete update.meta.user_ids;
for (user_id of Array.from(user_ids)) {
if (UpdatesManager._validUserId(user_id)) {
update.meta.users.push(fetchedUserInfo[user_id]);
} else {
update.meta.users.push(null);
}
}
}
return callback(null, updates);
});
},
_validUserId: (user_id) ->
if !user_id?
return false
else
return !!user_id.match(/^[a-f0-9]{24}$/)
_validUserId(user_id) {
if ((user_id == null)) {
return false;
} else {
return !!user_id.match(/^[a-f0-9]{24}$/);
}
},
TIME_BETWEEN_DISTINCT_UPDATES: fiveMinutes = 5 * 60 * 1000
SPLIT_ON_DELETE_SIZE: 16 # characters
_summarizeUpdates: (updates, existingSummarizedUpdates = []) ->
summarizedUpdates = existingSummarizedUpdates.slice()
previousUpdateWasBigDelete = false
for update in updates
earliestUpdate = summarizedUpdates[summarizedUpdates.length - 1]
shouldConcat = false
TIME_BETWEEN_DISTINCT_UPDATES: (fiveMinutes = 5 * 60 * 1000),
SPLIT_ON_DELETE_SIZE: 16, // characters
_summarizeUpdates(updates, existingSummarizedUpdates) {
if (existingSummarizedUpdates == null) { existingSummarizedUpdates = []; }
const summarizedUpdates = existingSummarizedUpdates.slice();
let previousUpdateWasBigDelete = false;
for (let update of Array.from(updates)) {
var doc_id;
const earliestUpdate = summarizedUpdates[summarizedUpdates.length - 1];
let shouldConcat = false;
# If a user inserts some text, then deletes a big chunk including that text,
# the update we show might concat the insert and delete, and there will be no sign
# of that insert having happened, or be able to restore to it (restoring after a big delete is common).
# So, we split the summary on 'big' deletes. However, we've stepping backwards in time with
# most recent changes considered first, so if this update is a big delete, we want to start
# a new summarized update next timge, hence we monitor the previous update.
if previousUpdateWasBigDelete
shouldConcat = false
else if earliestUpdate and earliestUpdate.meta.end_ts - update.meta.start_ts < @TIME_BETWEEN_DISTINCT_UPDATES
# We're going backwards in time through the updates, so only combine if this update starts less than 5 minutes before
# the end of current summarized block, so no block spans more than 5 minutes.
shouldConcat = true
// If a user inserts some text, then deletes a big chunk including that text,
// the update we show might concat the insert and delete, and there will be no sign
// of that insert having happened, or be able to restore to it (restoring after a big delete is common).
// So, we split the summary on 'big' deletes. However, we've stepping backwards in time with
// most recent changes considered first, so if this update is a big delete, we want to start
// a new summarized update next timge, hence we monitor the previous update.
if (previousUpdateWasBigDelete) {
shouldConcat = false;
} else if (earliestUpdate && ((earliestUpdate.meta.end_ts - update.meta.start_ts) < this.TIME_BETWEEN_DISTINCT_UPDATES)) {
// We're going backwards in time through the updates, so only combine if this update starts less than 5 minutes before
// the end of current summarized block, so no block spans more than 5 minutes.
shouldConcat = true;
}
isBigDelete = false
for op in update.op or []
if op.d? and op.d.length > @SPLIT_ON_DELETE_SIZE
isBigDelete = true
let isBigDelete = false;
for (let op of Array.from(update.op || [])) {
if ((op.d != null) && (op.d.length > this.SPLIT_ON_DELETE_SIZE)) {
isBigDelete = true;
}
}
previousUpdateWasBigDelete = isBigDelete
previousUpdateWasBigDelete = isBigDelete;
if shouldConcat
# check if the user in this update is already present in the earliest update,
# if not, add them to the users list of the earliest update
earliestUpdate.meta.user_ids = _.union earliestUpdate.meta.user_ids, [update.meta.user_id]
if (shouldConcat) {
// check if the user in this update is already present in the earliest update,
// if not, add them to the users list of the earliest update
earliestUpdate.meta.user_ids = _.union(earliestUpdate.meta.user_ids, [update.meta.user_id]);
doc_id = update.doc_id.toString()
doc = earliestUpdate.docs[doc_id]
if doc?
doc.fromV = Math.min(doc.fromV, update.v)
doc.toV = Math.max(doc.toV, update.v)
else
earliestUpdate.docs[doc_id] =
fromV: update.v
doc_id = update.doc_id.toString();
const doc = earliestUpdate.docs[doc_id];
if (doc != null) {
doc.fromV = Math.min(doc.fromV, update.v);
doc.toV = Math.max(doc.toV, update.v);
} else {
earliestUpdate.docs[doc_id] = {
fromV: update.v,
toV: update.v
};
}
earliestUpdate.meta.start_ts = Math.min(earliestUpdate.meta.start_ts, update.meta.start_ts)
earliestUpdate.meta.end_ts = Math.max(earliestUpdate.meta.end_ts, update.meta.end_ts)
else
newUpdate =
meta:
user_ids: []
start_ts: update.meta.start_ts
earliestUpdate.meta.start_ts = Math.min(earliestUpdate.meta.start_ts, update.meta.start_ts);
earliestUpdate.meta.end_ts = Math.max(earliestUpdate.meta.end_ts, update.meta.end_ts);
} else {
const newUpdate = {
meta: {
user_ids: [],
start_ts: update.meta.start_ts,
end_ts: update.meta.end_ts
},
docs: {}
};
newUpdate.docs[update.doc_id.toString()] =
fromV: update.v
newUpdate.docs[update.doc_id.toString()] = {
fromV: update.v,
toV: update.v
newUpdate.meta.user_ids.push update.meta.user_id
summarizedUpdates.push newUpdate
};
newUpdate.meta.user_ids.push(update.meta.user_id);
summarizedUpdates.push(newUpdate);
}
}
return summarizedUpdates
return summarizedUpdates;
}
});
function __guard__(value, transform) {
return (typeof value !== 'undefined' && value !== null) ? transform(value) : undefined;
}

View file

@ -1,69 +1,99 @@
request = require "requestretry" # allow retry on error https://github.com/FGRibreau/node-request-retry
logger = require "logger-sharelatex"
Settings = require "settings-sharelatex"
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let WebApiManager;
const request = require("requestretry"); // allow retry on error https://github.com/FGRibreau/node-request-retry
const logger = require("logger-sharelatex");
const Settings = require("settings-sharelatex");
# Don't let HTTP calls hang for a long time
MAX_HTTP_REQUEST_LENGTH = 15000 # 15 seconds
// Don't let HTTP calls hang for a long time
const MAX_HTTP_REQUEST_LENGTH = 15000; // 15 seconds
# DEPRECATED! This method of getting user details via track-changes is deprecated
# in the way we lay out our services.
# Instead, web should be responsible for collecting the raw data (user_ids) and
# filling it out with calls to other services. All API calls should create a
# tree-like structure as much as possible, with web as the root.
module.exports = WebApiManager =
sendRequest: (url, callback = (error, body) ->) ->
request.get {
url: "#{Settings.apis.web.url}#{url}"
timeout: MAX_HTTP_REQUEST_LENGTH
maxAttempts: 2 # for node-request-retry
auth:
user: Settings.apis.web.user
pass: Settings.apis.web.pass
// DEPRECATED! This method of getting user details via track-changes is deprecated
// in the way we lay out our services.
// Instead, web should be responsible for collecting the raw data (user_ids) and
// filling it out with calls to other services. All API calls should create a
// tree-like structure as much as possible, with web as the root.
module.exports = (WebApiManager = {
sendRequest(url, callback) {
if (callback == null) { callback = function(error, body) {}; }
return request.get({
url: `${Settings.apis.web.url}${url}`,
timeout: MAX_HTTP_REQUEST_LENGTH,
maxAttempts: 2, // for node-request-retry
auth: {
user: Settings.apis.web.user,
pass: Settings.apis.web.pass,
sendImmediately: true
}, (error, res, body)->
if error?
return callback(error)
if res.statusCode == 404
logger.log url: url, "got 404 from web api"
return callback null, null
if res.statusCode >= 200 and res.statusCode < 300
return callback null, body
else
error = new Error("web returned a non-success status code: #{res.statusCode} (attempts: #{res.attempts})")
callback error
}
}, function(error, res, body){
if (error != null) {
return callback(error);
}
if (res.statusCode === 404) {
logger.log({url}, "got 404 from web api");
return callback(null, null);
}
if ((res.statusCode >= 200) && (res.statusCode < 300)) {
return callback(null, body);
} else {
error = new Error(`web returned a non-success status code: ${res.statusCode} (attempts: ${res.attempts})`);
return callback(error);
}
});
},
getUserInfo: (user_id, callback = (error, userInfo) ->) ->
url = "/user/#{user_id}/personal_info"
logger.log user_id: user_id, "getting user info from web"
WebApiManager.sendRequest url, (error, body) ->
if error?
logger.error err: error, user_id: user_id, url: url, "error accessing web"
return callback error
if body == null
logger.error user_id: user_id, url: url, "no user found"
return callback null, null
try
user = JSON.parse(body)
catch error
return callback(error)
callback null, {
id: user.id
email: user.email
first_name: user.first_name
last_name: user.last_name
getUserInfo(user_id, callback) {
if (callback == null) { callback = function(error, userInfo) {}; }
const url = `/user/${user_id}/personal_info`;
logger.log({user_id}, "getting user info from web");
return WebApiManager.sendRequest(url, function(error, body) {
let user;
if (error != null) {
logger.error({err: error, user_id, url}, "error accessing web");
return callback(error);
}
getProjectDetails: (project_id, callback = (error, details) ->) ->
url = "/project/#{project_id}/details"
logger.log project_id: project_id, "getting project details from web"
WebApiManager.sendRequest url, (error, body) ->
if error?
logger.error err: error, project_id: project_id, url: url, "error accessing web"
return callback error
if (body === null) {
logger.error({user_id, url}, "no user found");
return callback(null, null);
}
try {
user = JSON.parse(body);
} catch (error1) {
error = error1;
return callback(error);
}
return callback(null, {
id: user.id,
email: user.email,
first_name: user.first_name,
last_name: user.last_name
});
});
},
try
project = JSON.parse(body)
catch error
return callback(error)
callback null, project
getProjectDetails(project_id, callback) {
if (callback == null) { callback = function(error, details) {}; }
const url = `/project/${project_id}/details`;
logger.log({project_id}, "getting project details from web");
return WebApiManager.sendRequest(url, function(error, body) {
let project;
if (error != null) {
logger.error({err: error, project_id, url}, "error accessing web");
return callback(error);
}
try {
project = JSON.parse(body);
} catch (error1) {
error = error1;
return callback(error);
}
return callback(null, project);
});
}
});

View file

@ -1,9 +1,10 @@
Settings = require "settings-sharelatex"
mongojs = require "mongojs"
bson = require "bson"
db = mongojs(Settings.mongo.url, ["docHistory", "projectHistoryMetaData", "docHistoryIndex"])
module.exports =
db: db
ObjectId: mongojs.ObjectId
const Settings = require("settings-sharelatex");
const mongojs = require("mongojs");
const bson = require("bson");
const db = mongojs(Settings.mongo.url, ["docHistory", "projectHistoryMetaData", "docHistoryIndex"]);
module.exports = {
db,
ObjectId: mongojs.ObjectId,
BSON: new bson.BSONPure()
};