decaffeinate: Convert DiffGenerator.coffee and 17 other files to JS

This commit is contained in:
decaffeinate 2020-02-17 18:34:04 +01:00 committed by mserranom
parent a971c5895b
commit 57345632e0
18 changed files with 2834 additions and 2000 deletions

View file

@ -1,227 +1,293 @@
ConsistencyError = (message) -> /*
error = new Error(message) * decaffeinate suggestions:
error.name = "ConsistencyError" * DS101: Remove unnecessary use of Array.from
error.__proto__ = ConsistencyError.prototype * DS102: Remove unnecessary code created because of implicit returns
return error * DS207: Consider shorter variations of null checks
ConsistencyError.prototype.__proto__ = Error.prototype * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let DiffGenerator;
var ConsistencyError = function(message) {
const error = new Error(message);
error.name = "ConsistencyError";
error.__proto__ = ConsistencyError.prototype;
return error;
};
ConsistencyError.prototype.__proto__ = Error.prototype;
logger = require "logger-sharelatex" const logger = require("logger-sharelatex");
module.exports = DiffGenerator = module.exports = (DiffGenerator = {
ConsistencyError: ConsistencyError ConsistencyError,
rewindUpdate: (content, update) -> rewindUpdate(content, update) {
for op, i in update.op by -1 when op.broken isnt true for (let j = update.op.length - 1, i = j; j >= 0; j--, i = j) {
try const op = update.op[i];
content = DiffGenerator.rewindOp content, op if (op.broken !== true) {
catch e try {
if e instanceof ConsistencyError and i = update.op.length - 1 content = DiffGenerator.rewindOp(content, op);
# catch known case where the last op in an array has been } catch (e) {
# merged into a later op if (e instanceof ConsistencyError && (i = update.op.length - 1)) {
logger.error {err: e, update, op: JSON.stringify(op)}, "marking op as broken" // catch known case where the last op in an array has been
op.broken = true // merged into a later op
else logger.error({err: e, update, op: JSON.stringify(op)}, "marking op as broken");
throw e # rethrow the execption op.broken = true;
return content } else {
throw e; // rethrow the execption
}
}
}
}
return content;
},
rewindOp: (content, op) -> rewindOp(content, op) {
if op.i? let p;
# ShareJS will accept an op where p > content.length when applied, if (op.i != null) {
# and it applies as though p == content.length. However, the op is // ShareJS will accept an op where p > content.length when applied,
# passed to us with the original p > content.length. Detect if that // and it applies as though p == content.length. However, the op is
# is the case with this op, and shift p back appropriately to match // passed to us with the original p > content.length. Detect if that
# ShareJS if so. // is the case with this op, and shift p back appropriately to match
p = op.p // ShareJS if so.
max_p = content.length - op.i.length ({ p } = op);
if p > max_p const max_p = content.length - op.i.length;
logger.warn {max_p, p}, "truncating position to content length" if (p > max_p) {
p = max_p logger.warn({max_p, p}, "truncating position to content length");
p = max_p;
}
textToBeRemoved = content.slice(p, p + op.i.length) const textToBeRemoved = content.slice(p, p + op.i.length);
if op.i != textToBeRemoved if (op.i !== textToBeRemoved) {
throw new ConsistencyError( throw new ConsistencyError(
"Inserted content, '#{op.i}', does not match text to be removed, '#{textToBeRemoved}'" `Inserted content, '${op.i}', does not match text to be removed, '${textToBeRemoved}'`
) );
}
return content.slice(0, p) + content.slice(p + op.i.length) return content.slice(0, p) + content.slice(p + op.i.length);
else if op.d? } else if (op.d != null) {
return content.slice(0, op.p) + op.d + content.slice(op.p) return content.slice(0, op.p) + op.d + content.slice(op.p);
else } else {
return content return content;
}
},
rewindUpdates: (content, updates) -> rewindUpdates(content, updates) {
for update in updates.reverse() for (let update of Array.from(updates.reverse())) {
try try {
content = DiffGenerator.rewindUpdate(content, update) content = DiffGenerator.rewindUpdate(content, update);
catch e } catch (e) {
e.attempted_update = update # keep a record of the attempted update e.attempted_update = update; // keep a record of the attempted update
throw e # rethrow the exception throw e; // rethrow the exception
return content }
}
return content;
},
buildDiff: (initialContent, updates) -> buildDiff(initialContent, updates) {
diff = [ u: initialContent ] let diff = [ {u: initialContent} ];
for update in updates for (let update of Array.from(updates)) {
diff = DiffGenerator.applyUpdateToDiff diff, update diff = DiffGenerator.applyUpdateToDiff(diff, update);
diff = DiffGenerator.compressDiff diff }
return diff diff = DiffGenerator.compressDiff(diff);
return diff;
},
compressDiff: (diff) -> compressDiff(diff) {
newDiff = [] const newDiff = [];
for part in diff for (let part of Array.from(diff)) {
lastPart = newDiff[newDiff.length - 1] const lastPart = newDiff[newDiff.length - 1];
if lastPart? and lastPart.meta?.user? and part.meta?.user? if ((lastPart != null) && ((lastPart.meta != null ? lastPart.meta.user : undefined) != null) && ((part.meta != null ? part.meta.user : undefined) != null)) {
if lastPart.i? and part.i? and lastPart.meta.user.id == part.meta.user.id if ((lastPart.i != null) && (part.i != null) && (lastPart.meta.user.id === part.meta.user.id)) {
lastPart.i += part.i lastPart.i += part.i;
lastPart.meta.start_ts = Math.min(lastPart.meta.start_ts, part.meta.start_ts) lastPart.meta.start_ts = Math.min(lastPart.meta.start_ts, part.meta.start_ts);
lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts) lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts);
else if lastPart.d? and part.d? and lastPart.meta.user.id == part.meta.user.id } else if ((lastPart.d != null) && (part.d != null) && (lastPart.meta.user.id === part.meta.user.id)) {
lastPart.d += part.d lastPart.d += part.d;
lastPart.meta.start_ts = Math.min(lastPart.meta.start_ts, part.meta.start_ts) lastPart.meta.start_ts = Math.min(lastPart.meta.start_ts, part.meta.start_ts);
lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts) lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts);
else } else {
newDiff.push part newDiff.push(part);
else }
newDiff.push part } else {
return newDiff newDiff.push(part);
}
}
return newDiff;
},
applyOpToDiff: (diff, op, meta) -> applyOpToDiff(diff, op, meta) {
position = 0 let consumedDiff;
const position = 0;
remainingDiff = diff.slice() let remainingDiff = diff.slice();
{consumedDiff, remainingDiff} = DiffGenerator._consumeToOffset(remainingDiff, op.p) ({consumedDiff, remainingDiff} = DiffGenerator._consumeToOffset(remainingDiff, op.p));
newDiff = consumedDiff const newDiff = consumedDiff;
if op.i? if (op.i != null) {
newDiff.push newDiff.push({
i: op.i i: op.i,
meta: meta meta
else if op.d? });
{consumedDiff, remainingDiff} = DiffGenerator._consumeDiffAffectedByDeleteOp remainingDiff, op, meta } else if (op.d != null) {
newDiff.push(consumedDiff...) ({consumedDiff, remainingDiff} = DiffGenerator._consumeDiffAffectedByDeleteOp(remainingDiff, op, meta));
newDiff.push(...Array.from(consumedDiff || []));
newDiff.push(remainingDiff...)
return newDiff
applyUpdateToDiff: (diff, update) ->
for op in update.op when op.broken isnt true
diff = DiffGenerator.applyOpToDiff diff, op, update.meta
return diff
_consumeToOffset: (remainingDiff, totalOffset) ->
consumedDiff = []
position = 0
while part = remainingDiff.shift()
length = DiffGenerator._getLengthOfDiffPart part
if part.d?
consumedDiff.push part
else if position + length >= totalOffset
partOffset = totalOffset - position
if partOffset > 0
consumedDiff.push DiffGenerator._slicePart part, 0, partOffset
if partOffset < length
remainingDiff.unshift DiffGenerator._slicePart part, partOffset
break
else
position += length
consumedDiff.push part
return {
consumedDiff: consumedDiff
remainingDiff: remainingDiff
} }
_consumeDiffAffectedByDeleteOp: (remainingDiff, deleteOp, meta) -> newDiff.push(...Array.from(remainingDiff || []));
consumedDiff = []
remainingOp = deleteOp return newDiff;
while remainingOp and remainingDiff.length > 0 },
{newPart, remainingDiff, remainingOp} = DiffGenerator._consumeDeletedPart remainingDiff, remainingOp, meta
consumedDiff.push newPart if newPart? applyUpdateToDiff(diff, update) {
return { for (let op of Array.from(update.op)) {
consumedDiff: consumedDiff if (op.broken !== true) {
remainingDiff: remainingDiff diff = DiffGenerator.applyOpToDiff(diff, op, update.meta);
}
}
return diff;
},
_consumeToOffset(remainingDiff, totalOffset) {
let part;
const consumedDiff = [];
let position = 0;
while ((part = remainingDiff.shift())) {
const length = DiffGenerator._getLengthOfDiffPart(part);
if (part.d != null) {
consumedDiff.push(part);
} else if ((position + length) >= totalOffset) {
const partOffset = totalOffset - position;
if (partOffset > 0) {
consumedDiff.push(DiffGenerator._slicePart(part, 0, partOffset));
}
if (partOffset < length) {
remainingDiff.unshift(DiffGenerator._slicePart(part, partOffset));
}
break;
} else {
position += length;
consumedDiff.push(part);
}
} }
_consumeDeletedPart: (remainingDiff, op, meta) -> return {
part = remainingDiff.shift() consumedDiff,
partLength = DiffGenerator._getLengthOfDiffPart part remainingDiff
};
},
if part.d? _consumeDiffAffectedByDeleteOp(remainingDiff, deleteOp, meta) {
# Skip existing deletes const consumedDiff = [];
remainingOp = op let remainingOp = deleteOp;
newPart = part while (remainingOp && (remainingDiff.length > 0)) {
let newPart;
({newPart, remainingDiff, remainingOp} = DiffGenerator._consumeDeletedPart(remainingDiff, remainingOp, meta));
if (newPart != null) { consumedDiff.push(newPart); }
}
return {
consumedDiff,
remainingDiff
};
},
else if partLength > op.d.length _consumeDeletedPart(remainingDiff, op, meta) {
# Only the first bit of the part has been deleted let deletedContent, newPart, remainingOp;
remainingPart = DiffGenerator._slicePart part, op.d.length const part = remainingDiff.shift();
remainingDiff.unshift remainingPart const partLength = DiffGenerator._getLengthOfDiffPart(part);
deletedContent = DiffGenerator._getContentOfPart(part).slice(0, op.d.length) if (part.d != null) {
if deletedContent != op.d // Skip existing deletes
throw new ConsistencyError("deleted content, '#{deletedContent}', does not match delete op, '#{op.d}'") remainingOp = op;
newPart = part;
if part.u? } else if (partLength > op.d.length) {
newPart = // Only the first bit of the part has been deleted
d: op.d const remainingPart = DiffGenerator._slicePart(part, op.d.length);
meta: meta remainingDiff.unshift(remainingPart);
else if part.i?
newPart = null
remainingOp = null deletedContent = DiffGenerator._getContentOfPart(part).slice(0, op.d.length);
if (deletedContent !== op.d) {
throw new ConsistencyError(`deleted content, '${deletedContent}', does not match delete op, '${op.d}'`);
}
else if partLength == op.d.length if (part.u != null) {
# The entire part has been deleted, but it is the last part newPart = {
d: op.d,
meta
};
} else if (part.i != null) {
newPart = null;
}
deletedContent = DiffGenerator._getContentOfPart(part) remainingOp = null;
if deletedContent != op.d
throw new ConsistencyError("deleted content, '#{deletedContent}', does not match delete op, '#{op.d}'")
if part.u? } else if (partLength === op.d.length) {
newPart = // The entire part has been deleted, but it is the last part
d: op.d
meta: meta
else if part.i?
newPart = null
remainingOp = null deletedContent = DiffGenerator._getContentOfPart(part);
if (deletedContent !== op.d) {
throw new ConsistencyError(`deleted content, '${deletedContent}', does not match delete op, '${op.d}'`);
}
else if partLength < op.d.length if (part.u != null) {
# The entire part has been deleted and there is more newPart = {
d: op.d,
meta
};
} else if (part.i != null) {
newPart = null;
}
deletedContent = DiffGenerator._getContentOfPart(part) remainingOp = null;
opContent = op.d.slice(0, deletedContent.length)
if deletedContent != opContent
throw new ConsistencyError("deleted content, '#{deletedContent}', does not match delete op, '#{opContent}'")
if part.u } else if (partLength < op.d.length) {
newPart = // The entire part has been deleted and there is more
d: part.u
meta: meta deletedContent = DiffGenerator._getContentOfPart(part);
else if part.i? const opContent = op.d.slice(0, deletedContent.length);
newPart = null if (deletedContent !== opContent) {
throw new ConsistencyError(`deleted content, '${deletedContent}', does not match delete op, '${opContent}'`);
}
if (part.u) {
newPart = {
d: part.u,
meta
};
} else if (part.i != null) {
newPart = null;
}
remainingOp = remainingOp =
p: op.p, d: op.d.slice(DiffGenerator._getLengthOfDiffPart(part)) {p: op.p, d: op.d.slice(DiffGenerator._getLengthOfDiffPart(part))};
return {
newPart: newPart
remainingDiff: remainingDiff
remainingOp: remainingOp
} }
_slicePart: (basePart, from, to) -> return {
if basePart.u? newPart,
part = { u: basePart.u.slice(from, to) } remainingDiff,
else if basePart.i? remainingOp
part = { i: basePart.i.slice(from, to) } };
if basePart.meta? },
part.meta = basePart.meta
return part
_getLengthOfDiffPart: (part) -> _slicePart(basePart, from, to) {
(part.u or part.d or part.i or '').length let part;
if (basePart.u != null) {
part = { u: basePart.u.slice(from, to) };
} else if (basePart.i != null) {
part = { i: basePart.i.slice(from, to) };
}
if (basePart.meta != null) {
part.meta = basePart.meta;
}
return part;
},
_getContentOfPart: (part) -> _getLengthOfDiffPart(part) {
part.u or part.d or part.i or '' return (part.u || part.d || part.i || '').length;
},
_getContentOfPart(part) {
return part.u || part.d || part.i || '';
}
});

View file

@ -1,88 +1,128 @@
UpdatesManager = require "./UpdatesManager" /*
DocumentUpdaterManager = require "./DocumentUpdaterManager" * decaffeinate suggestions:
DiffGenerator = require "./DiffGenerator" * DS101: Remove unnecessary use of Array.from
logger = require "logger-sharelatex" * DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let DiffManager;
const UpdatesManager = require("./UpdatesManager");
const DocumentUpdaterManager = require("./DocumentUpdaterManager");
const DiffGenerator = require("./DiffGenerator");
const logger = require("logger-sharelatex");
module.exports = DiffManager = module.exports = (DiffManager = {
getLatestDocAndUpdates: (project_id, doc_id, fromVersion, callback = (error, content, version, updates) ->) -> getLatestDocAndUpdates(project_id, doc_id, fromVersion, callback) {
# Get updates last, since then they must be ahead and it // Get updates last, since then they must be ahead and it
# might be possible to rewind to the same version as the doc. // might be possible to rewind to the same version as the doc.
DocumentUpdaterManager.getDocument project_id, doc_id, (error, content, version) -> if (callback == null) { callback = function(error, content, version, updates) {}; }
return callback(error) if error? return DocumentUpdaterManager.getDocument(project_id, doc_id, function(error, content, version) {
if !fromVersion? # If we haven't been given a version, just return lastest doc and no updates if (error != null) { return callback(error); }
return callback(null, content, version, []) if ((fromVersion == null)) { // If we haven't been given a version, just return lastest doc and no updates
UpdatesManager.getDocUpdatesWithUserInfo project_id, doc_id, from: fromVersion, (error, updates) -> return callback(null, content, version, []);
return callback(error) if error? }
callback(null, content, version, updates) return UpdatesManager.getDocUpdatesWithUserInfo(project_id, doc_id, {from: fromVersion}, function(error, updates) {
if (error != null) { return callback(error); }
return callback(null, content, version, updates);
});
});
},
getDiff: (project_id, doc_id, fromVersion, toVersion, callback = (error, diff) ->) -> getDiff(project_id, doc_id, fromVersion, toVersion, callback) {
DiffManager.getDocumentBeforeVersion project_id, doc_id, fromVersion, (error, startingContent, updates) -> if (callback == null) { callback = function(error, diff) {}; }
if error? return DiffManager.getDocumentBeforeVersion(project_id, doc_id, fromVersion, function(error, startingContent, updates) {
if error.message == "broken-history" let diff;
return callback(null, "history unavailable") if (error != null) {
else if (error.message === "broken-history") {
return callback(error) return callback(null, "history unavailable");
} else {
return callback(error);
}
}
updatesToApply = [] const updatesToApply = [];
for update in updates.slice().reverse() for (let update of Array.from(updates.slice().reverse())) {
if update.v <= toVersion if (update.v <= toVersion) {
updatesToApply.push update updatesToApply.push(update);
}
}
try try {
diff = DiffGenerator.buildDiff startingContent, updatesToApply diff = DiffGenerator.buildDiff(startingContent, updatesToApply);
catch e } catch (e) {
return callback(e) return callback(e);
}
callback(null, diff) return callback(null, diff);
});
},
getDocumentBeforeVersion: (project_id, doc_id, version, _callback = (error, document, rewoundUpdates) ->) -> getDocumentBeforeVersion(project_id, doc_id, version, _callback) {
# Whichever order we get the latest document and the latest updates, // Whichever order we get the latest document and the latest updates,
# there is potential for updates to be applied between them so that // there is potential for updates to be applied between them so that
# they do not return the same 'latest' versions. // they do not return the same 'latest' versions.
# If this happens, we just retry and hopefully get them at the compatible // If this happens, we just retry and hopefully get them at the compatible
# versions. // versions.
retries = 3 let retry;
callback = (error, args...) -> if (_callback == null) { _callback = function(error, document, rewoundUpdates) {}; }
if error? let retries = 3;
if error.retry and retries > 0 const callback = function(error, ...args) {
logger.warn {error, project_id, doc_id, version, retries}, "retrying getDocumentBeforeVersion" if (error != null) {
retry() if (error.retry && (retries > 0)) {
else logger.warn({error, project_id, doc_id, version, retries}, "retrying getDocumentBeforeVersion");
_callback(error) return retry();
else } else {
_callback(null, args...) return _callback(error);
}
} else {
return _callback(null, ...Array.from(args));
}
};
do retry = () -> return (retry = function() {
retries-- retries--;
DiffManager._tryGetDocumentBeforeVersion(project_id, doc_id, version, callback) return DiffManager._tryGetDocumentBeforeVersion(project_id, doc_id, version, callback);
})();
},
_tryGetDocumentBeforeVersion: (project_id, doc_id, version, callback = (error, document, rewoundUpdates) ->) -> _tryGetDocumentBeforeVersion(project_id, doc_id, version, callback) {
logger.log project_id: project_id, doc_id: doc_id, version: version, "getting document before version" if (callback == null) { callback = function(error, document, rewoundUpdates) {}; }
DiffManager.getLatestDocAndUpdates project_id, doc_id, version, (error, content, version, updates) -> logger.log({project_id, doc_id, version}, "getting document before version");
return callback(error) if error? return DiffManager.getLatestDocAndUpdates(project_id, doc_id, version, function(error, content, version, updates) {
let startingContent;
if (error != null) { return callback(error); }
# bail out if we hit a broken update // bail out if we hit a broken update
for u in updates when u.broken for (let u of Array.from(updates)) {
return callback new Error "broken-history" if (u.broken) {
return callback(new Error("broken-history"));
}
}
# discard any updates which are ahead of this document version // discard any updates which are ahead of this document version
while updates[0]?.v >= version while ((updates[0] != null ? updates[0].v : undefined) >= version) {
updates.shift() updates.shift();
}
lastUpdate = updates[0] const lastUpdate = updates[0];
if lastUpdate? and lastUpdate.v != version - 1 if ((lastUpdate != null) && (lastUpdate.v !== (version - 1))) {
error = new Error("latest update version, #{lastUpdate.v}, does not match doc version, #{version}") error = new Error(`latest update version, ${lastUpdate.v}, does not match doc version, ${version}`);
error.retry = true error.retry = true;
return callback error return callback(error);
}
logger.log {docVersion: version, lastUpdateVersion: lastUpdate?.v, updateCount: updates.length}, "rewinding updates" logger.log({docVersion: version, lastUpdateVersion: (lastUpdate != null ? lastUpdate.v : undefined), updateCount: updates.length}, "rewinding updates");
tryUpdates = updates.slice().reverse() const tryUpdates = updates.slice().reverse();
try try {
startingContent = DiffGenerator.rewindUpdates content, tryUpdates startingContent = DiffGenerator.rewindUpdates(content, tryUpdates);
# tryUpdates is reversed, and any unapplied ops are marked as broken // tryUpdates is reversed, and any unapplied ops are marked as broken
catch e } catch (e) {
return callback(e) return callback(e);
}
callback(null, startingContent, tryUpdates) return callback(null, startingContent, tryUpdates);
});
}
});

View file

@ -1,42 +1,63 @@
request = require "request" /*
logger = require "logger-sharelatex" * decaffeinate suggestions:
Settings = require "settings-sharelatex" * DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let DocumentUpdaterManager;
const request = require("request");
const logger = require("logger-sharelatex");
const Settings = require("settings-sharelatex");
module.exports = DocumentUpdaterManager = module.exports = (DocumentUpdaterManager = {
getDocument: (project_id, doc_id, callback = (error, content, version) ->) -> getDocument(project_id, doc_id, callback) {
url = "#{Settings.apis.documentupdater.url}/project/#{project_id}/doc/#{doc_id}" if (callback == null) { callback = function(error, content, version) {}; }
logger.log project_id:project_id, doc_id: doc_id, "getting doc from document updater" const url = `${Settings.apis.documentupdater.url}/project/${project_id}/doc/${doc_id}`;
request.get url, (error, res, body)-> logger.log({project_id, doc_id}, "getting doc from document updater");
if error? return request.get(url, function(error, res, body){
return callback(error) if (error != null) {
if res.statusCode >= 200 and res.statusCode < 300 return callback(error);
try }
body = JSON.parse(body) if ((res.statusCode >= 200) && (res.statusCode < 300)) {
catch error try {
return callback(error) body = JSON.parse(body);
logger.log {project_id, doc_id, version: body.version}, "got doc from document updater" } catch (error1) {
callback null, body.lines.join("\n"), body.version error = error1;
else return callback(error);
error = new Error("doc updater returned a non-success status code: #{res.statusCode}") }
logger.error err: error, project_id:project_id, doc_id:doc_id, url: url, "error accessing doc updater" logger.log({project_id, doc_id, version: body.version}, "got doc from document updater");
callback error return callback(null, body.lines.join("\n"), body.version);
} else {
error = new Error(`doc updater returned a non-success status code: ${res.statusCode}`);
logger.error({err: error, project_id, doc_id, url}, "error accessing doc updater");
return callback(error);
}
});
},
setDocument: (project_id, doc_id, content, user_id, callback = (error) ->) -> setDocument(project_id, doc_id, content, user_id, callback) {
url = "#{Settings.apis.documentupdater.url}/project/#{project_id}/doc/#{doc_id}" if (callback == null) { callback = function(error) {}; }
logger.log project_id:project_id, doc_id: doc_id, "setting doc in document updater" const url = `${Settings.apis.documentupdater.url}/project/${project_id}/doc/${doc_id}`;
request.post { logger.log({project_id, doc_id}, "setting doc in document updater");
url: url return request.post({
json: url,
lines: content.split("\n") json: {
source: "restore" lines: content.split("\n"),
user_id: user_id source: "restore",
user_id,
undoing: true undoing: true
}, (error, res, body)-> }
if error? }, function(error, res, body){
return callback(error) if (error != null) {
if res.statusCode >= 200 and res.statusCode < 300 return callback(error);
callback null }
else if ((res.statusCode >= 200) && (res.statusCode < 300)) {
error = new Error("doc updater returned a non-success status code: #{res.statusCode}") return callback(null);
logger.error err: error, project_id:project_id, doc_id:doc_id, url: url, "error accessing doc updater" } else {
callback error error = new Error(`doc updater returned a non-success status code: ${res.statusCode}`);
logger.error({err: error, project_id, doc_id, url}, "error accessing doc updater");
return callback(error);
}
});
}
});

View file

@ -1,46 +1,64 @@
ObjectId = require("mongojs").ObjectId /*
request = require("request") * decaffeinate suggestions:
async = require("async") * DS102: Remove unnecessary code created because of implicit returns
settings = require("settings-sharelatex") * DS207: Consider shorter variations of null checks
port = settings.internal.trackchanges.port * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
logger = require "logger-sharelatex" */
LockManager = require "./LockManager" const { ObjectId } = require("mongojs");
const request = require("request");
const async = require("async");
const settings = require("settings-sharelatex");
const { port } = settings.internal.trackchanges;
const logger = require("logger-sharelatex");
const LockManager = require("./LockManager");
module.exports = module.exports = {
check : (callback)-> check(callback){
project_id = ObjectId(settings.trackchanges.healthCheck.project_id) const project_id = ObjectId(settings.trackchanges.healthCheck.project_id);
url = "http://localhost:#{port}/project/#{project_id}" const url = `http://localhost:${port}/project/${project_id}`;
logger.log project_id:project_id, "running health check" logger.log({project_id}, "running health check");
jobs = [ const jobs = [
(cb)-> cb=>
request.get {url:"http://localhost:#{port}/check_lock", timeout:3000}, (err, res, body) -> request.get({url:`http://localhost:${port}/check_lock`, timeout:3000}, function(err, res, body) {
if err? if (err != null) {
logger.err err:err, project_id:project_id, "error checking lock for health check" logger.err({err, project_id}, "error checking lock for health check");
cb(err) return cb(err);
else if res?.statusCode != 200 } else if ((res != null ? res.statusCode : undefined) !== 200) {
cb("status code not 200, it's #{res.statusCode}") return cb(`status code not 200, it's ${res.statusCode}`);
else } else {
cb() return cb();
(cb)-> }
request.post {url:"#{url}/flush", timeout:10000}, (err, res, body) -> })
if err? ,
logger.err err:err, project_id:project_id, "error flushing for health check" cb=>
cb(err) request.post({url:`${url}/flush`, timeout:10000}, function(err, res, body) {
else if res?.statusCode != 204 if (err != null) {
cb("status code not 204, it's #{res.statusCode}") logger.err({err, project_id}, "error flushing for health check");
else return cb(err);
cb() } else if ((res != null ? res.statusCode : undefined) !== 204) {
(cb)-> return cb(`status code not 204, it's ${res.statusCode}`);
request.get {url:"#{url}/updates", timeout:10000}, (err, res, body)-> } else {
if err? return cb();
logger.err err:err, project_id:project_id, "error getting updates for health check" }
cb(err) })
else if res?.statusCode != 200 ,
cb("status code not 200, it's #{res.statusCode}") cb=>
else request.get({url:`${url}/updates`, timeout:10000}, function(err, res, body){
cb() if (err != null) {
] logger.err({err, project_id}, "error getting updates for health check");
async.series jobs, callback return cb(err);
} else if ((res != null ? res.statusCode : undefined) !== 200) {
return cb(`status code not 200, it's ${res.statusCode}`);
} else {
return cb();
}
})
checkLock: (callback) -> ];
LockManager.healthCheck callback return async.series(jobs, callback);
},
checkLock(callback) {
return LockManager.healthCheck(callback);
}
};

View file

@ -1,137 +1,195 @@
UpdatesManager = require "./UpdatesManager" /*
DiffManager = require "./DiffManager" * decaffeinate suggestions:
PackManager = require "./PackManager" * DS101: Remove unnecessary use of Array.from
RestoreManager = require "./RestoreManager" * DS102: Remove unnecessary code created because of implicit returns
logger = require "logger-sharelatex" * DS207: Consider shorter variations of null checks
HealthChecker = require "./HealthChecker" * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
_ = require "underscore" */
let HttpController;
const UpdatesManager = require("./UpdatesManager");
const DiffManager = require("./DiffManager");
const PackManager = require("./PackManager");
const RestoreManager = require("./RestoreManager");
const logger = require("logger-sharelatex");
const HealthChecker = require("./HealthChecker");
const _ = require("underscore");
module.exports = HttpController = module.exports = (HttpController = {
flushDoc: (req, res, next = (error) ->) -> flushDoc(req, res, next) {
doc_id = req.params.doc_id if (next == null) { next = function(error) {}; }
project_id = req.params.project_id const { doc_id } = req.params;
logger.log project_id: project_id, doc_id: doc_id, "compressing doc history" const { project_id } = req.params;
UpdatesManager.processUncompressedUpdatesWithLock project_id, doc_id, (error) -> logger.log({project_id, doc_id}, "compressing doc history");
return next(error) if error? return UpdatesManager.processUncompressedUpdatesWithLock(project_id, doc_id, function(error) {
res.send 204 if (error != null) { return next(error); }
return res.send(204);
});
},
flushProject: (req, res, next = (error) ->) -> flushProject(req, res, next) {
project_id = req.params.project_id if (next == null) { next = function(error) {}; }
logger.log project_id: project_id, "compressing project history" const { project_id } = req.params;
UpdatesManager.processUncompressedUpdatesForProject project_id, (error) -> logger.log({project_id}, "compressing project history");
return next(error) if error? return UpdatesManager.processUncompressedUpdatesForProject(project_id, function(error) {
res.send 204 if (error != null) { return next(error); }
return res.send(204);
});
},
flushAll: (req, res, next = (error) ->) -> flushAll(req, res, next) {
# limit on projects to flush or -1 for all (default) // limit on projects to flush or -1 for all (default)
limit = if req.query.limit? then parseInt(req.query.limit, 10) else -1 if (next == null) { next = function(error) {}; }
logger.log {limit: limit}, "flushing all projects" const limit = (req.query.limit != null) ? parseInt(req.query.limit, 10) : -1;
UpdatesManager.flushAll limit, (error, result) -> logger.log({limit}, "flushing all projects");
return next(error) if error? return UpdatesManager.flushAll(limit, function(error, result) {
{failed, succeeded, all} = result if (error != null) { return next(error); }
status = "#{succeeded.length} succeeded, #{failed.length} failed" const {failed, succeeded, all} = result;
if limit == 0 const status = `${succeeded.length} succeeded, ${failed.length} failed`;
res.status(200).send "#{status}\nwould flush:\n#{all.join('\n')}\n" if (limit === 0) {
else if failed.length > 0 return res.status(200).send(`${status}\nwould flush:\n${all.join('\n')}\n`);
logger.log {failed: failed, succeeded: succeeded}, "error flushing projects" } else if (failed.length > 0) {
res.status(500).send "#{status}\nfailed to flush:\n#{failed.join('\n')}\n" logger.log({failed, succeeded}, "error flushing projects");
else return res.status(500).send(`${status}\nfailed to flush:\n${failed.join('\n')}\n`);
res.status(200).send "#{status}\nflushed #{succeeded.length} projects of #{all.length}\n" } else {
return res.status(200).send(`${status}\nflushed ${succeeded.length} projects of ${all.length}\n`);
checkDanglingUpdates: (req, res, next = (error) ->) ->
logger.log "checking dangling updates"
UpdatesManager.getDanglingUpdates (error, result) ->
return next(error) if error?
if result.length > 0
logger.log {dangling: result}, "found dangling updates"
res.status(500).send "dangling updates:\n#{result.join('\n')}\n"
else
res.status(200).send "no dangling updates found\n"
checkDoc: (req, res, next = (error) ->) ->
doc_id = req.params.doc_id
project_id = req.params.project_id
logger.log project_id: project_id, doc_id: doc_id, "checking doc history"
DiffManager.getDocumentBeforeVersion project_id, doc_id, 1, (error, document, rewoundUpdates) ->
return next(error) if error?
broken = []
for update in rewoundUpdates
for op in update.op when op.broken is true
broken.push op
if broken.length > 0
res.send broken
else
res.send 204
getDiff: (req, res, next = (error) ->) ->
doc_id = req.params.doc_id
project_id = req.params.project_id
if req.query.from?
from = parseInt(req.query.from, 10)
else
from = null
if req.query.to?
to = parseInt(req.query.to, 10)
else
to = null
logger.log {project_id, doc_id, from, to}, "getting diff"
DiffManager.getDiff project_id, doc_id, from, to, (error, diff) ->
return next(error) if error?
res.json {diff: diff}
getUpdates: (req, res, next = (error) ->) ->
project_id = req.params.project_id
if req.query.before?
before = parseInt(req.query.before, 10)
if req.query.min_count?
min_count = parseInt(req.query.min_count, 10)
UpdatesManager.getSummarizedProjectUpdates project_id, before: before, min_count: min_count, (error, updates, nextBeforeTimestamp) ->
return next(error) if error?
res.json {
updates: updates
nextBeforeTimestamp: nextBeforeTimestamp
} }
});
},
restore: (req, res, next = (error) ->) -> checkDanglingUpdates(req, res, next) {
{doc_id, project_id, version} = req.params if (next == null) { next = function(error) {}; }
user_id = req.headers["x-user-id"] logger.log("checking dangling updates");
version = parseInt(version, 10) return UpdatesManager.getDanglingUpdates(function(error, result) {
RestoreManager.restoreToBeforeVersion project_id, doc_id, version, user_id, (error) -> if (error != null) { return next(error); }
return next(error) if error? if (result.length > 0) {
res.send 204 logger.log({dangling: result}, "found dangling updates");
return res.status(500).send(`dangling updates:\n${result.join('\n')}\n`);
} else {
return res.status(200).send("no dangling updates found\n");
}
});
},
pushDocHistory: (req, res, next = (error) ->) -> checkDoc(req, res, next) {
project_id = req.params.project_id if (next == null) { next = function(error) {}; }
doc_id = req.params.doc_id const { doc_id } = req.params;
logger.log {project_id, doc_id}, "pushing all finalised changes to s3" const { project_id } = req.params;
PackManager.pushOldPacks project_id, doc_id, (error) -> logger.log({project_id, doc_id}, "checking doc history");
return next(error) if error? return DiffManager.getDocumentBeforeVersion(project_id, doc_id, 1, function(error, document, rewoundUpdates) {
res.send 204 if (error != null) { return next(error); }
const broken = [];
for (let update of Array.from(rewoundUpdates)) {
for (let op of Array.from(update.op)) {
if (op.broken === true) {
broken.push(op);
}
}
}
if (broken.length > 0) {
return res.send(broken);
} else {
return res.send(204);
}
});
},
pullDocHistory: (req, res, next = (error) ->) -> getDiff(req, res, next) {
project_id = req.params.project_id let from, to;
doc_id = req.params.doc_id if (next == null) { next = function(error) {}; }
logger.log {project_id, doc_id}, "pulling all packs from s3" const { doc_id } = req.params;
PackManager.pullOldPacks project_id, doc_id, (error) -> const { project_id } = req.params;
return next(error) if error?
res.send 204
healthCheck: (req, res)-> if (req.query.from != null) {
HealthChecker.check (err)-> from = parseInt(req.query.from, 10);
if err? } else {
logger.err err:err, "error performing health check" from = null;
res.send 500 }
else if (req.query.to != null) {
res.send 200 to = parseInt(req.query.to, 10);
} else {
to = null;
}
checkLock: (req, res)-> logger.log({project_id, doc_id, from, to}, "getting diff");
HealthChecker.checkLock (err) -> return DiffManager.getDiff(project_id, doc_id, from, to, function(error, diff) {
if err? if (error != null) { return next(error); }
logger.err err:err, "error performing lock check" return res.json({diff});
res.send 500 });
else },
res.send 200
getUpdates(req, res, next) {
let before, min_count;
if (next == null) { next = function(error) {}; }
const { project_id } = req.params;
if (req.query.before != null) {
before = parseInt(req.query.before, 10);
}
if (req.query.min_count != null) {
min_count = parseInt(req.query.min_count, 10);
}
return UpdatesManager.getSummarizedProjectUpdates(project_id, {before, min_count}, function(error, updates, nextBeforeTimestamp) {
if (error != null) { return next(error); }
return res.json({
updates,
nextBeforeTimestamp
});
});
},
restore(req, res, next) {
if (next == null) { next = function(error) {}; }
let {doc_id, project_id, version} = req.params;
const user_id = req.headers["x-user-id"];
version = parseInt(version, 10);
return RestoreManager.restoreToBeforeVersion(project_id, doc_id, version, user_id, function(error) {
if (error != null) { return next(error); }
return res.send(204);
});
},
pushDocHistory(req, res, next) {
if (next == null) { next = function(error) {}; }
const { project_id } = req.params;
const { doc_id } = req.params;
logger.log({project_id, doc_id}, "pushing all finalised changes to s3");
return PackManager.pushOldPacks(project_id, doc_id, function(error) {
if (error != null) { return next(error); }
return res.send(204);
});
},
pullDocHistory(req, res, next) {
if (next == null) { next = function(error) {}; }
const { project_id } = req.params;
const { doc_id } = req.params;
logger.log({project_id, doc_id}, "pulling all packs from s3");
return PackManager.pullOldPacks(project_id, doc_id, function(error) {
if (error != null) { return next(error); }
return res.send(204);
});
},
healthCheck(req, res){
return HealthChecker.check(function(err){
if (err != null) {
logger.err({err}, "error performing health check");
return res.send(500);
} else {
return res.send(200);
}
});
},
checkLock(req, res){
return HealthChecker.checkLock(function(err) {
if (err != null) {
logger.err({err}, "error performing lock check");
return res.send(500);
} else {
return res.send(200);
}
});
}
});

View file

@ -1,85 +1,119 @@
Settings = require "settings-sharelatex" /*
redis = require("redis-sharelatex") * decaffeinate suggestions:
rclient = redis.createClient(Settings.redis.lock) * DS102: Remove unnecessary code created because of implicit returns
os = require "os" * DS207: Consider shorter variations of null checks
crypto = require "crypto" * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
logger = require "logger-sharelatex" */
let LockManager;
const Settings = require("settings-sharelatex");
const redis = require("redis-sharelatex");
const rclient = redis.createClient(Settings.redis.lock);
const os = require("os");
const crypto = require("crypto");
const logger = require("logger-sharelatex");
HOST = os.hostname() const HOST = os.hostname();
PID = process.pid const PID = process.pid;
RND = crypto.randomBytes(4).toString('hex') const RND = crypto.randomBytes(4).toString('hex');
COUNT = 0 let COUNT = 0;
module.exports = LockManager = module.exports = (LockManager = {
LOCK_TEST_INTERVAL: 50 # 50ms between each test of the lock LOCK_TEST_INTERVAL: 50, // 50ms between each test of the lock
MAX_LOCK_WAIT_TIME: 10000 # 10s maximum time to spend trying to get the lock MAX_LOCK_WAIT_TIME: 10000, // 10s maximum time to spend trying to get the lock
LOCK_TTL: 300 # seconds (allow 5 minutes for any operation to complete) LOCK_TTL: 300, // seconds (allow 5 minutes for any operation to complete)
# Use a signed lock value as described in // Use a signed lock value as described in
# http://redis.io/topics/distlock#correct-implementation-with-a-single-instance // http://redis.io/topics/distlock#correct-implementation-with-a-single-instance
# to prevent accidental unlocking by multiple processes // to prevent accidental unlocking by multiple processes
randomLock : () -> randomLock() {
time = Date.now() const time = Date.now();
return "locked:host=#{HOST}:pid=#{PID}:random=#{RND}:time=#{time}:count=#{COUNT++}" return `locked:host=${HOST}:pid=${PID}:random=${RND}:time=${time}:count=${COUNT++}`;
},
unlockScript: 'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("del", KEYS[1]) else return 0 end'; unlockScript: 'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("del", KEYS[1]) else return 0 end',
tryLock : (key, callback = (err, gotLock) ->) -> tryLock(key, callback) {
lockValue = LockManager.randomLock() if (callback == null) { callback = function(err, gotLock) {}; }
rclient.set key, lockValue, "EX", @LOCK_TTL, "NX", (err, gotLock)-> const lockValue = LockManager.randomLock();
return callback(err) if err? return rclient.set(key, lockValue, "EX", this.LOCK_TTL, "NX", function(err, gotLock){
if gotLock == "OK" if (err != null) { return callback(err); }
callback err, true, lockValue if (gotLock === "OK") {
else return callback(err, true, lockValue);
callback err, false } else {
return callback(err, false);
}
});
},
getLock: (key, callback = (error) ->) -> getLock(key, callback) {
startTime = Date.now() let attempt;
do attempt = () -> if (callback == null) { callback = function(error) {}; }
if Date.now() - startTime > LockManager.MAX_LOCK_WAIT_TIME const startTime = Date.now();
e = new Error("Timeout") return (attempt = function() {
e.key = key if ((Date.now() - startTime) > LockManager.MAX_LOCK_WAIT_TIME) {
return callback(e) const e = new Error("Timeout");
e.key = key;
return callback(e);
}
LockManager.tryLock key, (error, gotLock, lockValue) -> return LockManager.tryLock(key, function(error, gotLock, lockValue) {
return callback(error) if error? if (error != null) { return callback(error); }
if gotLock if (gotLock) {
callback(null, lockValue) return callback(null, lockValue);
else } else {
setTimeout attempt, LockManager.LOCK_TEST_INTERVAL return setTimeout(attempt, LockManager.LOCK_TEST_INTERVAL);
}
});
})();
},
checkLock: (key, callback = (err, isFree) ->) -> checkLock(key, callback) {
rclient.exists key, (err, exists) -> if (callback == null) { callback = function(err, isFree) {}; }
return callback(err) if err? return rclient.exists(key, function(err, exists) {
exists = parseInt exists if (err != null) { return callback(err); }
if exists == 1 exists = parseInt(exists);
callback err, false if (exists === 1) {
else return callback(err, false);
callback err, true } else {
return callback(err, true);
}
});
},
releaseLock: (key, lockValue, callback) -> releaseLock(key, lockValue, callback) {
rclient.eval LockManager.unlockScript, 1, key, lockValue, (err, result) -> return rclient.eval(LockManager.unlockScript, 1, key, lockValue, function(err, result) {
if err? if (err != null) {
return callback(err) return callback(err);
if result? and result isnt 1 # successful unlock should release exactly one key }
logger.error {key:key, lockValue:lockValue, redis_err:err, redis_result:result}, "unlocking error" if ((result != null) && (result !== 1)) { // successful unlock should release exactly one key
return callback(new Error("tried to release timed out lock")) logger.error({key, lockValue, redis_err:err, redis_result:result}, "unlocking error");
callback(err,result) return callback(new Error("tried to release timed out lock"));
}
return callback(err,result);
});
},
runWithLock: (key, runner, callback = ( (error) -> )) -> runWithLock(key, runner, callback) {
LockManager.getLock key, (error, lockValue) -> if (callback == null) { callback = function(error) {}; }
return callback(error) if error? return LockManager.getLock(key, function(error, lockValue) {
runner (error1) -> if (error != null) { return callback(error); }
LockManager.releaseLock key, lockValue, (error2) -> return runner(error1 =>
error = error1 or error2 LockManager.releaseLock(key, lockValue, function(error2) {
return callback(error) if error? error = error1 || error2;
callback() if (error != null) { return callback(error); }
return callback();
})
);
});
},
healthCheck: (callback) -> healthCheck(callback) {
action = (releaseLock) -> const action = releaseLock => releaseLock();
releaseLock() return LockManager.runWithLock(`HistoryLock:HealthCheck:host=${HOST}:pid=${PID}:random=${RND}`, action, callback);
LockManager.runWithLock "HistoryLock:HealthCheck:host=#{HOST}:pid=#{PID}:random=#{RND}", action, callback },
close: (callback) -> close(callback) {
rclient.quit() rclient.quit();
rclient.once 'end', callback return rclient.once('end', callback);
}
});

View file

@ -1,118 +1,141 @@
settings = require "settings-sharelatex" /*
logger = require "logger-sharelatex" * decaffeinate suggestions:
AWS = require 'aws-sdk' * DS101: Remove unnecessary use of Array.from
S3S = require 's3-streams' * DS102: Remove unnecessary code created because of implicit returns
{db, ObjectId} = require "./mongojs" * DS207: Consider shorter variations of null checks
JSONStream = require "JSONStream" * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
ReadlineStream = require "byline" */
zlib = require "zlib" let MongoAWS;
Metrics = require "metrics-sharelatex" const settings = require("settings-sharelatex");
const logger = require("logger-sharelatex");
const AWS = require('aws-sdk');
const S3S = require('s3-streams');
const {db, ObjectId} = require("./mongojs");
const JSONStream = require("JSONStream");
const ReadlineStream = require("byline");
const zlib = require("zlib");
const Metrics = require("metrics-sharelatex");
DAYS = 24 * 3600 * 1000 # one day in milliseconds const DAYS = 24 * 3600 * 1000; // one day in milliseconds
createStream = (streamConstructor, project_id, doc_id, pack_id) -> const createStream = function(streamConstructor, project_id, doc_id, pack_id) {
AWS_CONFIG = const AWS_CONFIG = {
accessKeyId: settings.trackchanges.s3.key accessKeyId: settings.trackchanges.s3.key,
secretAccessKey: settings.trackchanges.s3.secret secretAccessKey: settings.trackchanges.s3.secret,
endpoint: settings.trackchanges.s3.endpoint endpoint: settings.trackchanges.s3.endpoint,
s3ForcePathStyle: settings.trackchanges.s3.pathStyle s3ForcePathStyle: settings.trackchanges.s3.pathStyle
};
return streamConstructor new AWS.S3(AWS_CONFIG), { return streamConstructor(new AWS.S3(AWS_CONFIG), {
"Bucket": settings.trackchanges.stores.doc_history, "Bucket": settings.trackchanges.stores.doc_history,
"Key": project_id+"/changes-"+doc_id+"/pack-"+pack_id "Key": project_id+"/changes-"+doc_id+"/pack-"+pack_id
} });
};
module.exports = MongoAWS = module.exports = (MongoAWS = {
archivePack: (project_id, doc_id, pack_id, _callback = (error) ->) -> archivePack(project_id, doc_id, pack_id, _callback) {
callback = (args...) -> if (_callback == null) { _callback = function(error) {}; }
_callback(args...) const callback = function(...args) {
_callback = () -> _callback(...Array.from(args || []));
return _callback = function() {};
};
query = { const query = {
_id: ObjectId(pack_id) _id: ObjectId(pack_id),
doc_id: ObjectId(doc_id) doc_id: ObjectId(doc_id)
} };
return callback new Error("invalid project id") if not project_id? if ((project_id == null)) { return callback(new Error("invalid project id")); }
return callback new Error("invalid doc id") if not doc_id? if ((doc_id == null)) { return callback(new Error("invalid doc id")); }
return callback new Error("invalid pack id") if not pack_id? if ((pack_id == null)) { return callback(new Error("invalid pack id")); }
logger.log {project_id, doc_id, pack_id}, "uploading data to s3" logger.log({project_id, doc_id, pack_id}, "uploading data to s3");
upload = createStream S3S.WriteStream, project_id, doc_id, pack_id const upload = createStream(S3S.WriteStream, project_id, doc_id, pack_id);
db.docHistory.findOne query, (err, result) -> return db.docHistory.findOne(query, function(err, result) {
return callback(err) if err? if (err != null) { return callback(err); }
return callback new Error("cannot find pack to send to s3") if not result? if ((result == null)) { return callback(new Error("cannot find pack to send to s3")); }
return callback new Error("refusing to send pack with TTL to s3") if result.expiresAt? if (result.expiresAt != null) { return callback(new Error("refusing to send pack with TTL to s3")); }
uncompressedData = JSON.stringify(result) const uncompressedData = JSON.stringify(result);
if uncompressedData.indexOf("\u0000") != -1 if (uncompressedData.indexOf("\u0000") !== -1) {
error = new Error("null bytes found in upload") const error = new Error("null bytes found in upload");
logger.error err: error, project_id: project_id, doc_id: doc_id, pack_id: pack_id, error.message logger.error({err: error, project_id, doc_id, pack_id}, error.message);
return callback(error) return callback(error);
zlib.gzip uncompressedData, (err, buf) -> }
logger.log {project_id, doc_id, pack_id, origSize: uncompressedData.length, newSize: buf.length}, "compressed pack" return zlib.gzip(uncompressedData, function(err, buf) {
return callback(err) if err? logger.log({project_id, doc_id, pack_id, origSize: uncompressedData.length, newSize: buf.length}, "compressed pack");
upload.on 'error', (err) -> if (err != null) { return callback(err); }
callback(err) upload.on('error', err => callback(err));
upload.on 'finish', () -> upload.on('finish', function() {
Metrics.inc("archive-pack") Metrics.inc("archive-pack");
logger.log {project_id, doc_id, pack_id}, "upload to s3 completed" logger.log({project_id, doc_id, pack_id}, "upload to s3 completed");
callback(null) return callback(null);
upload.write buf });
upload.end() upload.write(buf);
return upload.end();
});
});
},
readArchivedPack: (project_id, doc_id, pack_id, _callback = (error, result) ->) -> readArchivedPack(project_id, doc_id, pack_id, _callback) {
callback = (args...) -> if (_callback == null) { _callback = function(error, result) {}; }
_callback(args...) const callback = function(...args) {
_callback = () -> _callback(...Array.from(args || []));
return _callback = function() {};
};
return callback new Error("invalid project id") if not project_id? if ((project_id == null)) { return callback(new Error("invalid project id")); }
return callback new Error("invalid doc id") if not doc_id? if ((doc_id == null)) { return callback(new Error("invalid doc id")); }
return callback new Error("invalid pack id") if not pack_id? if ((pack_id == null)) { return callback(new Error("invalid pack id")); }
logger.log {project_id, doc_id, pack_id}, "downloading data from s3" logger.log({project_id, doc_id, pack_id}, "downloading data from s3");
download = createStream S3S.ReadStream, project_id, doc_id, pack_id const download = createStream(S3S.ReadStream, project_id, doc_id, pack_id);
inputStream = download const inputStream = download
.on 'open', (obj) -> .on('open', obj => 1).on('error', err => callback(err));
return 1
.on 'error', (err) ->
callback(err)
gunzip = zlib.createGunzip() const gunzip = zlib.createGunzip();
gunzip.setEncoding('utf8') gunzip.setEncoding('utf8');
gunzip.on 'error', (err) -> gunzip.on('error', function(err) {
logger.log {project_id, doc_id, pack_id, err}, "error uncompressing gzip stream" logger.log({project_id, doc_id, pack_id, err}, "error uncompressing gzip stream");
callback(err) return callback(err);
});
outputStream = inputStream.pipe gunzip const outputStream = inputStream.pipe(gunzip);
parts = [] const parts = [];
outputStream.on 'error', (err) -> outputStream.on('error', err => callback(err));
return callback(err) outputStream.on('end', function() {
outputStream.on 'end', () -> let object;
logger.log {project_id, doc_id, pack_id}, "download from s3 completed" logger.log({project_id, doc_id, pack_id}, "download from s3 completed");
try try {
object = JSON.parse parts.join('') object = JSON.parse(parts.join(''));
catch e } catch (e) {
return callback(e) return callback(e);
object._id = ObjectId(object._id) }
object.doc_id = ObjectId(object.doc_id) object._id = ObjectId(object._id);
object.project_id = ObjectId(object.project_id) object.doc_id = ObjectId(object.doc_id);
for op in object.pack object.project_id = ObjectId(object.project_id);
op._id = ObjectId(op._id) if op._id? for (let op of Array.from(object.pack)) {
callback null, object if (op._id != null) { op._id = ObjectId(op._id); }
outputStream.on 'data', (data) -> }
parts.push data return callback(null, object);
});
return outputStream.on('data', data => parts.push(data));
},
unArchivePack: (project_id, doc_id, pack_id, callback = (error) ->) -> unArchivePack(project_id, doc_id, pack_id, callback) {
MongoAWS.readArchivedPack project_id, doc_id, pack_id, (err, object) -> if (callback == null) { callback = function(error) {}; }
return callback(err) if err? return MongoAWS.readArchivedPack(project_id, doc_id, pack_id, function(err, object) {
Metrics.inc("unarchive-pack") if (err != null) { return callback(err); }
# allow the object to expire, we can always retrieve it again Metrics.inc("unarchive-pack");
object.expiresAt = new Date(Date.now() + 7 * DAYS) // allow the object to expire, we can always retrieve it again
logger.log {project_id, doc_id, pack_id}, "inserting object from s3" object.expiresAt = new Date(Date.now() + (7 * DAYS));
db.docHistory.insert object, callback logger.log({project_id, doc_id, pack_id}, "inserting object from s3");
return db.docHistory.insert(object, callback);
});
}
});

View file

@ -1,104 +1,131 @@
{db, ObjectId} = require "./mongojs" /*
PackManager = require "./PackManager" * decaffeinate suggestions:
async = require "async" * DS102: Remove unnecessary code created because of implicit returns
_ = require "underscore" * DS207: Consider shorter variations of null checks
metrics = require 'metrics-sharelatex' * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
logger = require 'logger-sharelatex' */
let MongoManager;
const {db, ObjectId} = require("./mongojs");
const PackManager = require("./PackManager");
const async = require("async");
const _ = require("underscore");
const metrics = require('metrics-sharelatex');
const logger = require('logger-sharelatex');
module.exports = MongoManager = module.exports = (MongoManager = {
getLastCompressedUpdate: (doc_id, callback = (error, update) ->) -> getLastCompressedUpdate(doc_id, callback) {
db.docHistory if (callback == null) { callback = function(error, update) {}; }
.find(doc_id: ObjectId(doc_id.toString()), {pack: {$slice:-1}}) # only return the last entry in a pack return db.docHistory
.sort( v: -1 ) .find({doc_id: ObjectId(doc_id.toString())}, {pack: {$slice:-1}}) // only return the last entry in a pack
.sort({ v: -1 })
.limit(1) .limit(1)
.toArray (error, compressedUpdates) -> .toArray(function(error, compressedUpdates) {
return callback(error) if error? if (error != null) { return callback(error); }
callback null, compressedUpdates[0] or null return callback(null, compressedUpdates[0] || null);
});
},
peekLastCompressedUpdate: (doc_id, callback = (error, update, version) ->) -> peekLastCompressedUpdate(doc_id, callback) {
# under normal use we pass back the last update as // under normal use we pass back the last update as
# callback(null,update,version). // callback(null,update,version).
# //
# when we have an existing last update but want to force a new one // when we have an existing last update but want to force a new one
# to start, we pass it back as callback(null,null,version), just // to start, we pass it back as callback(null,null,version), just
# giving the version so we can check consistency. // giving the version so we can check consistency.
MongoManager.getLastCompressedUpdate doc_id, (error, update) -> if (callback == null) { callback = function(error, update, version) {}; }
return callback(error) if error? return MongoManager.getLastCompressedUpdate(doc_id, function(error, update) {
if update? if (error != null) { return callback(error); }
if update.broken # marked as broken so we will force a new op if (update != null) {
return callback null, null if (update.broken) { // marked as broken so we will force a new op
else if update.pack? return callback(null, null);
if update.finalised # no more ops can be appended } else if (update.pack != null) {
return callback null, null, update.pack[0]?.v if (update.finalised) { // no more ops can be appended
else return callback(null, null, update.pack[0] != null ? update.pack[0].v : undefined);
return callback null, update, update.pack[0]?.v } else {
else return callback(null, update, update.pack[0] != null ? update.pack[0].v : undefined);
return callback null, update, update.v }
else } else {
PackManager.getLastPackFromIndex doc_id, (error, pack) -> return callback(null, update, update.v);
return callback(error) if error? }
return callback(null, null, pack.v_end) if pack?.inS3? and pack?.v_end? } else {
callback null, null return PackManager.getLastPackFromIndex(doc_id, function(error, pack) {
if (error != null) { return callback(error); }
if (((pack != null ? pack.inS3 : undefined) != null) && ((pack != null ? pack.v_end : undefined) != null)) { return callback(null, null, pack.v_end); }
return callback(null, null);
});
}
});
},
backportProjectId: (project_id, doc_id, callback = (error) ->) -> backportProjectId(project_id, doc_id, callback) {
db.docHistory.update { if (callback == null) { callback = function(error) {}; }
doc_id: ObjectId(doc_id.toString()) return db.docHistory.update({
doc_id: ObjectId(doc_id.toString()),
project_id: { $exists: false } project_id: { $exists: false }
}, { }, {
$set: { project_id: ObjectId(project_id.toString()) } $set: { project_id: ObjectId(project_id.toString()) }
}, { }, {
multi: true multi: true
}, callback }, callback);
},
getProjectMetaData: (project_id, callback = (error, metadata) ->) -> getProjectMetaData(project_id, callback) {
db.projectHistoryMetaData.find { if (callback == null) { callback = function(error, metadata) {}; }
return db.projectHistoryMetaData.find({
project_id: ObjectId(project_id.toString()) project_id: ObjectId(project_id.toString())
}, (error, results) -> }, function(error, results) {
return callback(error) if error? if (error != null) { return callback(error); }
callback null, results[0] return callback(null, results[0]);
});
},
setProjectMetaData: (project_id, metadata, callback = (error) ->) -> setProjectMetaData(project_id, metadata, callback) {
db.projectHistoryMetaData.update { if (callback == null) { callback = function(error) {}; }
return db.projectHistoryMetaData.update({
project_id: ObjectId(project_id) project_id: ObjectId(project_id)
}, { }, {
$set: metadata $set: metadata
}, { }, {
upsert: true upsert: true
}, callback }, callback);
},
upgradeHistory: (project_id, callback = (error) ->) -> upgradeHistory(project_id, callback) {
# preserve the project's existing history // preserve the project's existing history
db.docHistory.update { if (callback == null) { callback = function(error) {}; }
project_id: ObjectId(project_id) return db.docHistory.update({
temporary: true project_id: ObjectId(project_id),
temporary: true,
expiresAt: {$exists: true} expiresAt: {$exists: true}
}, { }, {
$set: {temporary: false} $set: {temporary: false},
$unset: {expiresAt: ""} $unset: {expiresAt: ""}
}, { }, {
multi: true multi: true
}, callback }, callback);
},
ensureIndices: () -> ensureIndices() {
# For finding all updates that go into a diff for a doc // For finding all updates that go into a diff for a doc
db.docHistory.ensureIndex { doc_id: 1, v: 1 }, { background: true } db.docHistory.ensureIndex({ doc_id: 1, v: 1 }, { background: true });
# For finding all updates that affect a project // For finding all updates that affect a project
db.docHistory.ensureIndex { project_id: 1, "meta.end_ts": 1 }, { background: true } db.docHistory.ensureIndex({ project_id: 1, "meta.end_ts": 1 }, { background: true });
# For finding updates that don't yet have a project_id and need it inserting // For finding updates that don't yet have a project_id and need it inserting
db.docHistory.ensureIndex { doc_id: 1, project_id: 1 }, { background: true } db.docHistory.ensureIndex({ doc_id: 1, project_id: 1 }, { background: true });
# For finding project meta-data // For finding project meta-data
db.projectHistoryMetaData.ensureIndex { project_id: 1 }, { background: true } db.projectHistoryMetaData.ensureIndex({ project_id: 1 }, { background: true });
# TTL index for auto deleting week old temporary ops // TTL index for auto deleting week old temporary ops
db.docHistory.ensureIndex { expiresAt: 1 }, { expireAfterSeconds: 0, background: true } db.docHistory.ensureIndex({ expiresAt: 1 }, { expireAfterSeconds: 0, background: true });
# For finding packs to be checked for archiving // For finding packs to be checked for archiving
db.docHistory.ensureIndex { last_checked: 1 }, { background: true } db.docHistory.ensureIndex({ last_checked: 1 }, { background: true });
# For finding archived packs // For finding archived packs
db.docHistoryIndex.ensureIndex { project_id: 1 }, { background: true } return db.docHistoryIndex.ensureIndex({ project_id: 1 }, { background: true });
}
});
[ [
'getLastCompressedUpdate', 'getLastCompressedUpdate',
'getProjectMetaData', 'getProjectMetaData',
'setProjectMetaData' 'setProjectMetaData'
].map (method) -> ].map(method => metrics.timeAsyncMethod(MongoManager, method, 'mongo.MongoManager', logger));
metrics.timeAsyncMethod(MongoManager, method, 'mongo.MongoManager', logger)

File diff suppressed because it is too large Load diff

View file

@ -1,139 +1,183 @@
Settings = require "settings-sharelatex" /*
async = require "async" * decaffeinate suggestions:
_ = require "underscore" * DS101: Remove unnecessary use of Array.from
{db, ObjectId, BSON} = require "./mongojs" * DS102: Remove unnecessary code created because of implicit returns
fs = require "fs" * DS103: Rewrite code to no longer use __guard__
Metrics = require "metrics-sharelatex" * DS205: Consider reworking code to avoid use of IIFEs
Metrics.initialize("track-changes") * DS207: Consider shorter variations of null checks
logger = require "logger-sharelatex" * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
logger.initialize("track-changes-packworker") */
if Settings.sentry?.dsn? let LIMIT, pending;
logger.initializeErrorReporting(Settings.sentry.dsn) let project_id, doc_id;
const Settings = require("settings-sharelatex");
const async = require("async");
const _ = require("underscore");
const {db, ObjectId, BSON} = require("./mongojs");
const fs = require("fs");
const Metrics = require("metrics-sharelatex");
Metrics.initialize("track-changes");
const logger = require("logger-sharelatex");
logger.initialize("track-changes-packworker");
if ((Settings.sentry != null ? Settings.sentry.dsn : undefined) != null) {
logger.initializeErrorReporting(Settings.sentry.dsn);
}
DAYS = 24 * 3600 * 1000 const DAYS = 24 * 3600 * 1000;
LockManager = require "./LockManager" const LockManager = require("./LockManager");
PackManager = require "./PackManager" const PackManager = require("./PackManager");
# this worker script is forked by the main process to look for // this worker script is forked by the main process to look for
# document histories which can be archived // document histories which can be archived
source = process.argv[2] const source = process.argv[2];
DOCUMENT_PACK_DELAY = Number(process.argv[3]) || 1000 const DOCUMENT_PACK_DELAY = Number(process.argv[3]) || 1000;
TIMEOUT = Number(process.argv[4]) || 30*60*1000 const TIMEOUT = Number(process.argv[4]) || (30*60*1000);
COUNT = 0 # number processed let COUNT = 0; // number processed
TOTAL = 0 # total number to process let TOTAL = 0; // total number to process
if !source.match(/^[0-9]+$/) if (!source.match(/^[0-9]+$/)) {
file = fs.readFileSync source const file = fs.readFileSync(source);
result = for line in file.toString().split('\n') const result = (() => {
[project_id, doc_id] = line.split(' ') const result1 = [];
{doc_id, project_id} for (let line of Array.from(file.toString().split('\n'))) {
pending = _.filter result, (row) -> row?.doc_id?.match(/^[a-f0-9]{24}$/) [project_id, doc_id] = Array.from(line.split(' '));
else result1.push({doc_id, project_id});
LIMIT = Number(process.argv[2]) || 1000 }
return result1;
})();
pending = _.filter(result, row => __guard__(row != null ? row.doc_id : undefined, x => x.match(/^[a-f0-9]{24}$/)));
} else {
LIMIT = Number(process.argv[2]) || 1000;
}
shutDownRequested = false let shutDownRequested = false;
shutDownTimer = setTimeout () -> const shutDownTimer = setTimeout(function() {
logger.log "pack timed out, requesting shutdown" logger.log("pack timed out, requesting shutdown");
# start the shutdown on the next pack // start the shutdown on the next pack
shutDownRequested = true shutDownRequested = true;
# do a hard shutdown after a further 5 minutes // do a hard shutdown after a further 5 minutes
hardTimeout = setTimeout () -> const hardTimeout = setTimeout(function() {
logger.error "HARD TIMEOUT in pack archive worker" logger.error("HARD TIMEOUT in pack archive worker");
process.exit() return process.exit();
, 5*60*1000 }
hardTimeout.unref() , 5*60*1000);
, TIMEOUT return hardTimeout.unref();
}
, TIMEOUT);
logger.log "checking for updates, limit=#{LIMIT}, delay=#{DOCUMENT_PACK_DELAY}, timeout=#{TIMEOUT}" logger.log(`checking for updates, limit=${LIMIT}, delay=${DOCUMENT_PACK_DELAY}, timeout=${TIMEOUT}`);
# work around for https://github.com/mafintosh/mongojs/issues/224 // work around for https://github.com/mafintosh/mongojs/issues/224
db.close = (callback) -> db.close = function(callback) {
this._getServer (err, server) -> return this._getServer(function(err, server) {
return callback(err) if err? if (err != null) { return callback(err); }
server = if server.destroy? then server else server.topology server = (server.destroy != null) ? server : server.topology;
server.destroy(true, true) server.destroy(true, true);
callback() return callback();
});
};
finish = () -> const finish = function() {
if shutDownTimer? if (shutDownTimer != null) {
logger.log 'cancelling timeout' logger.log('cancelling timeout');
clearTimeout shutDownTimer clearTimeout(shutDownTimer);
logger.log 'closing db' }
db.close () -> logger.log('closing db');
logger.log 'closing LockManager Redis Connection' return db.close(function() {
LockManager.close () -> logger.log('closing LockManager Redis Connection');
logger.log {processedCount: COUNT, allCount: TOTAL}, 'ready to exit from pack archive worker' return LockManager.close(function() {
hardTimeout = setTimeout () -> logger.log({processedCount: COUNT, allCount: TOTAL}, 'ready to exit from pack archive worker');
logger.error 'hard exit from pack archive worker' const hardTimeout = setTimeout(function() {
process.exit(1) logger.error('hard exit from pack archive worker');
, 5*1000 return process.exit(1);
hardTimeout.unref() }
, 5*1000);
return hardTimeout.unref();
});
});
};
process.on 'exit', (code) -> process.on('exit', code => logger.log({code}, 'pack archive worker exited'));
logger.log {code}, 'pack archive worker exited'
processUpdates = (pending) -> const processUpdates = pending =>
async.eachSeries pending, (result, callback) -> async.eachSeries(pending, function(result, callback) {
{_id, project_id, doc_id} = result let _id;
COUNT++ ({_id, project_id, doc_id} = result);
logger.log {project_id, doc_id}, "processing #{COUNT}/#{TOTAL}" COUNT++;
if not project_id? or not doc_id? logger.log({project_id, doc_id}, `processing ${COUNT}/${TOTAL}`);
logger.log {project_id, doc_id}, "skipping pack, missing project/doc id" if ((project_id == null) || (doc_id == null)) {
return callback() logger.log({project_id, doc_id}, "skipping pack, missing project/doc id");
handler = (err, result) -> return callback();
if err? and err.code is "InternalError" and err.retryable }
logger.warn {err, result}, "ignoring S3 error in pack archive worker" const handler = function(err, result) {
# Ignore any s3 errors due to random problems if ((err != null) && (err.code === "InternalError") && err.retryable) {
err = null logger.warn({err, result}, "ignoring S3 error in pack archive worker");
if err? // Ignore any s3 errors due to random problems
logger.error {err, result}, "error in pack archive worker" err = null;
return callback(err) }
if shutDownRequested if (err != null) {
logger.warn "shutting down pack archive worker" logger.error({err, result}, "error in pack archive worker");
return callback(new Error("shutdown")) return callback(err);
setTimeout () -> }
callback(err, result) if (shutDownRequested) {
, DOCUMENT_PACK_DELAY logger.warn("shutting down pack archive worker");
if not _id? return callback(new Error("shutdown"));
PackManager.pushOldPacks project_id, doc_id, handler }
else return setTimeout(() => callback(err, result)
PackManager.processOldPack project_id, doc_id, _id, handler , DOCUMENT_PACK_DELAY);
, (err, results) -> };
if err? and err.message != "shutdown" if ((_id == null)) {
logger.error {err}, 'error in pack archive worker processUpdates' return PackManager.pushOldPacks(project_id, doc_id, handler);
finish() } else {
return PackManager.processOldPack(project_id, doc_id, _id, handler);
}
}
, function(err, results) {
if ((err != null) && (err.message !== "shutdown")) {
logger.error({err}, 'error in pack archive worker processUpdates');
}
return finish();
})
;
# find the packs which can be archived // find the packs which can be archived
ObjectIdFromDate = (date) -> const ObjectIdFromDate = function(date) {
id = Math.floor(date.getTime() / 1000).toString(16) + "0000000000000000"; const id = Math.floor(date.getTime() / 1000).toString(16) + "0000000000000000";
return ObjectId(id) return ObjectId(id);
};
# new approach, two passes // new approach, two passes
# find packs to be marked as finalised:true, those which have a newer pack present // find packs to be marked as finalised:true, those which have a newer pack present
# then only consider finalised:true packs for archiving // then only consider finalised:true packs for archiving
if pending? if (pending != null) {
logger.log "got #{pending.length} entries from #{source}" logger.log(`got ${pending.length} entries from ${source}`);
processUpdates pending processUpdates(pending);
else } else {
oneWeekAgo = new Date(Date.now() - 7 * DAYS) const oneWeekAgo = new Date(Date.now() - (7 * DAYS));
db.docHistory.find({ db.docHistory.find({
expiresAt: {$exists: false} expiresAt: {$exists: false},
project_id: {$exists: true} project_id: {$exists: true},
v_end: {$exists: true} v_end: {$exists: true},
_id: {$lt: ObjectIdFromDate(oneWeekAgo)} _id: {$lt: ObjectIdFromDate(oneWeekAgo)},
last_checked: {$lt: oneWeekAgo} last_checked: {$lt: oneWeekAgo}
}, {_id:1, doc_id:1, project_id:1}).sort({ }, {_id:1, doc_id:1, project_id:1}).sort({
last_checked:1 last_checked:1
}).limit LIMIT, (err, results) -> }).limit(LIMIT, function(err, results) {
if err? if (err != null) {
logger.log {err}, 'error checking for updates' logger.log({err}, 'error checking for updates');
finish() finish();
return return;
pending = _.uniq results, false, (result) -> result.doc_id.toString() }
TOTAL = pending.length pending = _.uniq(results, false, result => result.doc_id.toString());
logger.log "found #{TOTAL} documents to archive" TOTAL = pending.length;
processUpdates pending logger.log(`found ${TOTAL} documents to archive`);
return processUpdates(pending);
});
}
function __guard__(value, transform) {
return (typeof value !== 'undefined' && value !== null) ? transform(value) : undefined;
}

View file

@ -1,62 +1,84 @@
Heap = require "heap" /*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let ProjectIterator;
const Heap = require("heap");
module.exports = ProjectIterator = module.exports = (ProjectIterator =
class ProjectIterator (ProjectIterator = class ProjectIterator {
constructor: (packs, @before, @getPackByIdFn) -> constructor(packs, before, getPackByIdFn) {
byEndTs = (a,b) -> (b.meta.end_ts - a.meta.end_ts) || (a.fromIndex - b.fromIndex) this.before = before;
@packs = packs.slice().sort byEndTs this.getPackByIdFn = getPackByIdFn;
@queue = new Heap(byEndTs) const byEndTs = (a,b) => (b.meta.end_ts - a.meta.end_ts) || (a.fromIndex - b.fromIndex);
this.packs = packs.slice().sort(byEndTs);
this.queue = new Heap(byEndTs);
}
next: (callback) -> next(callback) {
# what's up next // what's up next
#console.log ">>> top item", iterator.packs[0] //console.log ">>> top item", iterator.packs[0]
iterator = this const iterator = this;
before = @before const { before } = this;
queue = iterator.queue const { queue } = iterator;
opsToReturn = [] const opsToReturn = [];
nextPack = iterator.packs[0] let nextPack = iterator.packs[0];
lowWaterMark = nextPack?.meta.end_ts || 0 let lowWaterMark = (nextPack != null ? nextPack.meta.end_ts : undefined) || 0;
nextItem = queue.peek() let nextItem = queue.peek();
#console.log "queue empty?", queue.empty() //console.log "queue empty?", queue.empty()
#console.log "nextItem", nextItem //console.log "nextItem", nextItem
#console.log "nextItem.meta.end_ts", nextItem?.meta.end_ts //console.log "nextItem.meta.end_ts", nextItem?.meta.end_ts
#console.log "lowWaterMark", lowWaterMark //console.log "lowWaterMark", lowWaterMark
while before? and nextPack?.meta.start_ts > before while ((before != null) && ((nextPack != null ? nextPack.meta.start_ts : undefined) > before)) {
# discard pack that is outside range // discard pack that is outside range
iterator.packs.shift() iterator.packs.shift();
nextPack = iterator.packs[0] nextPack = iterator.packs[0];
lowWaterMark = nextPack?.meta.end_ts || 0 lowWaterMark = (nextPack != null ? nextPack.meta.end_ts : undefined) || 0;
}
if (queue.empty() or nextItem?.meta.end_ts <= lowWaterMark) and nextPack? if ((queue.empty() || ((nextItem != null ? nextItem.meta.end_ts : undefined) <= lowWaterMark)) && (nextPack != null)) {
# retrieve the next pack and populate the queue // retrieve the next pack and populate the queue
return @getPackByIdFn nextPack.project_id, nextPack.doc_id, nextPack._id, (err, pack) -> return this.getPackByIdFn(nextPack.project_id, nextPack.doc_id, nextPack._id, function(err, pack) {
return callback(err) if err? if (err != null) { return callback(err); }
iterator.packs.shift() # have now retrieved this pack, remove it iterator.packs.shift(); // have now retrieved this pack, remove it
#console.log "got pack", pack //console.log "got pack", pack
for op in pack.pack when (not before? or op.meta.end_ts < before) for (let op of Array.from(pack.pack)) {
#console.log "adding op", op //console.log "adding op", op
op.doc_id = nextPack.doc_id if ((before == null) || (op.meta.end_ts < before)) {
op.project_id = nextPack.project_id op.doc_id = nextPack.doc_id;
queue.push op op.project_id = nextPack.project_id;
# now try again queue.push(op);
return iterator.next(callback) }
}
// now try again
return iterator.next(callback);
});
}
#console.log "nextItem", nextItem, "lowWaterMark", lowWaterMark //console.log "nextItem", nextItem, "lowWaterMark", lowWaterMark
while nextItem? and (nextItem?.meta.end_ts > lowWaterMark) while ((nextItem != null) && ((nextItem != null ? nextItem.meta.end_ts : undefined) > lowWaterMark)) {
opsToReturn.push nextItem opsToReturn.push(nextItem);
queue.pop() queue.pop();
nextItem = queue.peek() nextItem = queue.peek();
}
#console.log "queue empty?", queue.empty() //console.log "queue empty?", queue.empty()
#console.log "nextPack", nextPack? //console.log "nextPack", nextPack?
if queue.empty() and not nextPack? # got everything if (queue.empty() && (nextPack == null)) { // got everything
iterator._done = true iterator._done = true;
}
callback(null, opsToReturn) return callback(null, opsToReturn);
}
done: () -> done() {
return @_done return this._done;
}
}));

View file

@ -1,80 +1,121 @@
Settings = require "settings-sharelatex" /*
redis = require("redis-sharelatex") * decaffeinate suggestions:
rclient = redis.createClient(Settings.redis.history) * DS101: Remove unnecessary use of Array.from
Keys = Settings.redis.history.key_schema * DS102: Remove unnecessary code created because of implicit returns
async = require "async" * DS205: Consider reworking code to avoid use of IIFEs
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let RedisManager;
const Settings = require("settings-sharelatex");
const redis = require("redis-sharelatex");
const rclient = redis.createClient(Settings.redis.history);
const Keys = Settings.redis.history.key_schema;
const async = require("async");
module.exports = RedisManager = module.exports = (RedisManager = {
getOldestDocUpdates: (doc_id, batchSize, callback = (error, jsonUpdates) ->) -> getOldestDocUpdates(doc_id, batchSize, callback) {
key = Keys.uncompressedHistoryOps({doc_id}) if (callback == null) { callback = function(error, jsonUpdates) {}; }
rclient.lrange key, 0, batchSize - 1, callback const key = Keys.uncompressedHistoryOps({doc_id});
return rclient.lrange(key, 0, batchSize - 1, callback);
},
expandDocUpdates: (jsonUpdates, callback = (error, rawUpdates) ->) -> expandDocUpdates(jsonUpdates, callback) {
try let rawUpdates;
rawUpdates = ( JSON.parse(update) for update in jsonUpdates or [] ) if (callback == null) { callback = function(error, rawUpdates) {}; }
catch e try {
return callback(e) rawUpdates = ( Array.from(jsonUpdates || []).map((update) => JSON.parse(update)) );
callback null, rawUpdates } catch (e) {
return callback(e);
}
return callback(null, rawUpdates);
},
deleteAppliedDocUpdates: (project_id, doc_id, docUpdates, callback = (error) ->) -> deleteAppliedDocUpdates(project_id, doc_id, docUpdates, callback) {
multi = rclient.multi() if (callback == null) { callback = function(error) {}; }
# Delete all the updates which have been applied (exact match) const multi = rclient.multi();
for update in docUpdates or [] // Delete all the updates which have been applied (exact match)
multi.lrem Keys.uncompressedHistoryOps({doc_id}), 1, update for (let update of Array.from(docUpdates || [])) {
multi.exec (error, results) -> multi.lrem(Keys.uncompressedHistoryOps({doc_id}), 1, update);
return callback(error) if error? }
# It's ok to delete the doc_id from the set here. Even though the list return multi.exec(function(error, results) {
# of updates may not be empty, we will continue to process it until it is. if (error != null) { return callback(error); }
rclient.srem Keys.docsWithHistoryOps({project_id}), doc_id, (error) -> // It's ok to delete the doc_id from the set here. Even though the list
return callback(error) if error? // of updates may not be empty, we will continue to process it until it is.
callback null return rclient.srem(Keys.docsWithHistoryOps({project_id}), doc_id, function(error) {
if (error != null) { return callback(error); }
return callback(null);
});
});
},
getDocIdsWithHistoryOps: (project_id, callback = (error, doc_ids) ->) -> getDocIdsWithHistoryOps(project_id, callback) {
rclient.smembers Keys.docsWithHistoryOps({project_id}), callback if (callback == null) { callback = function(error, doc_ids) {}; }
return rclient.smembers(Keys.docsWithHistoryOps({project_id}), callback);
},
# iterate over keys asynchronously using redis scan (non-blocking) // iterate over keys asynchronously using redis scan (non-blocking)
# handle all the cluster nodes or single redis server // handle all the cluster nodes or single redis server
_getKeys: (pattern, callback) -> _getKeys(pattern, callback) {
nodes = rclient.nodes?('master') || [ rclient ]; const nodes = (typeof rclient.nodes === 'function' ? rclient.nodes('master') : undefined) || [ rclient ];
doKeyLookupForNode = (node, cb) -> const doKeyLookupForNode = (node, cb) => RedisManager._getKeysFromNode(node, pattern, cb);
RedisManager._getKeysFromNode node, pattern, cb return async.concatSeries(nodes, doKeyLookupForNode, callback);
async.concatSeries nodes, doKeyLookupForNode, callback },
_getKeysFromNode: (node, pattern, callback) -> _getKeysFromNode(node, pattern, callback) {
cursor = 0 # redis iterator let cursor = 0; // redis iterator
keySet = {} # use hash to avoid duplicate results const keySet = {}; // use hash to avoid duplicate results
# scan over all keys looking for pattern // scan over all keys looking for pattern
doIteration = (cb) -> var doIteration = cb =>
node.scan cursor, "MATCH", pattern, "COUNT", 1000, (error, reply) -> node.scan(cursor, "MATCH", pattern, "COUNT", 1000, function(error, reply) {
return callback(error) if error? let keys;
[cursor, keys] = reply if (error != null) { return callback(error); }
for key in keys [cursor, keys] = Array.from(reply);
keySet[key] = true for (let key of Array.from(keys)) {
if cursor == '0' # note redis returns string result not numeric keySet[key] = true;
return callback(null, Object.keys(keySet)) }
else if (cursor === '0') { // note redis returns string result not numeric
doIteration() return callback(null, Object.keys(keySet));
doIteration() } else {
return doIteration();
}
})
;
return doIteration();
},
# extract ids from keys like DocsWithHistoryOps:57fd0b1f53a8396d22b2c24b // extract ids from keys like DocsWithHistoryOps:57fd0b1f53a8396d22b2c24b
# or DocsWithHistoryOps:{57fd0b1f53a8396d22b2c24b} (for redis cluster) // or DocsWithHistoryOps:{57fd0b1f53a8396d22b2c24b} (for redis cluster)
_extractIds: (keyList) -> _extractIds(keyList) {
ids = for key in keyList const ids = (() => {
m = key.match(/:\{?([0-9a-f]{24})\}?/) # extract object id const result = [];
m[1] for (let key of Array.from(keyList)) {
return ids const m = key.match(/:\{?([0-9a-f]{24})\}?/); // extract object id
result.push(m[1]);
}
return result;
})();
return ids;
},
getProjectIdsWithHistoryOps: (callback = (error, project_ids) ->) -> getProjectIdsWithHistoryOps(callback) {
RedisManager._getKeys Keys.docsWithHistoryOps({project_id:"*"}), (error, project_keys) -> if (callback == null) { callback = function(error, project_ids) {}; }
return callback(error) if error? return RedisManager._getKeys(Keys.docsWithHistoryOps({project_id:"*"}), function(error, project_keys) {
project_ids = RedisManager._extractIds project_keys if (error != null) { return callback(error); }
callback(error, project_ids) const project_ids = RedisManager._extractIds(project_keys);
return callback(error, project_ids);
});
},
getAllDocIdsWithHistoryOps: (callback = (error, doc_ids) ->) -> getAllDocIdsWithHistoryOps(callback) {
# return all the docids, to find dangling history entries after // return all the docids, to find dangling history entries after
# everything is flushed. // everything is flushed.
RedisManager._getKeys Keys.uncompressedHistoryOps({doc_id:"*"}), (error, doc_keys) -> if (callback == null) { callback = function(error, doc_ids) {}; }
return callback(error) if error? return RedisManager._getKeys(Keys.uncompressedHistoryOps({doc_id:"*"}), function(error, doc_keys) {
doc_ids = RedisManager._extractIds doc_keys if (error != null) { return callback(error); }
callback(error, doc_ids) const doc_ids = RedisManager._extractIds(doc_keys);
return callback(error, doc_ids);
});
}
});

View file

@ -1,12 +1,24 @@
DocumentUpdaterManager = require "./DocumentUpdaterManager" /*
DiffManager = require "./DiffManager" * decaffeinate suggestions:
logger = require "logger-sharelatex" * DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let RestoreManager;
const DocumentUpdaterManager = require("./DocumentUpdaterManager");
const DiffManager = require("./DiffManager");
const logger = require("logger-sharelatex");
module.exports = RestoreManager = module.exports = (RestoreManager = {
restoreToBeforeVersion: (project_id, doc_id, version, user_id, callback = (error) ->) -> restoreToBeforeVersion(project_id, doc_id, version, user_id, callback) {
logger.log project_id: project_id, doc_id: doc_id, version: version, user_id: user_id, "restoring document" if (callback == null) { callback = function(error) {}; }
DiffManager.getDocumentBeforeVersion project_id, doc_id, version, (error, content) -> logger.log({project_id, doc_id, version, user_id}, "restoring document");
return callback(error) if error? return DiffManager.getDocumentBeforeVersion(project_id, doc_id, version, function(error, content) {
DocumentUpdaterManager.setDocument project_id, doc_id, content, user_id, (error) -> if (error != null) { return callback(error); }
return callback(error) if error? return DocumentUpdaterManager.setDocument(project_id, doc_id, content, user_id, function(error) {
callback() if (error != null) { return callback(error); }
return callback();
});
});
}
});

View file

@ -1,218 +1,278 @@
strInject = (s1, pos, s2) -> s1[...pos] + s2 + s1[pos..] /*
strRemove = (s1, pos, length) -> s1[...pos] + s1[(pos + length)..] * decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS103: Rewrite code to no longer use __guard__
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let oneMinute, twoMegabytes, UpdateCompressor;
const strInject = (s1, pos, s2) => s1.slice(0, pos) + s2 + s1.slice(pos);
const strRemove = (s1, pos, length) => s1.slice(0, pos) + s1.slice((pos + length));
diff_match_patch = require("../lib/diff_match_patch").diff_match_patch const { diff_match_patch } = require("../lib/diff_match_patch");
dmp = new diff_match_patch() const dmp = new diff_match_patch();
module.exports = UpdateCompressor = module.exports = (UpdateCompressor = {
NOOP: "noop" NOOP: "noop",
# Updates come from the doc updater in format // Updates come from the doc updater in format
# { // {
# op: [ { ... op1 ... }, { ... op2 ... } ] // op: [ { ... op1 ... }, { ... op2 ... } ]
# meta: { ts: ..., user_id: ... } // meta: { ts: ..., user_id: ... }
# } // }
# but it's easier to work with on op per update, so convert these updates to // but it's easier to work with on op per update, so convert these updates to
# our compressed format // our compressed format
# [{ // [{
# op: op1 // op: op1
# meta: { start_ts: ... , end_ts: ..., user_id: ... } // meta: { start_ts: ... , end_ts: ..., user_id: ... }
# }, { // }, {
# op: op2 // op: op2
# meta: { start_ts: ... , end_ts: ..., user_id: ... } // meta: { start_ts: ... , end_ts: ..., user_id: ... }
# }] // }]
convertToSingleOpUpdates: (updates) -> convertToSingleOpUpdates(updates) {
splitUpdates = [] const splitUpdates = [];
for update in updates for (let update of Array.from(updates)) {
# Reject any non-insert or delete ops, i.e. comments // Reject any non-insert or delete ops, i.e. comments
ops = update.op.filter (o) -> o.i? or o.d? const ops = update.op.filter(o => (o.i != null) || (o.d != null));
if ops.length == 0 if (ops.length === 0) {
splitUpdates.push splitUpdates.push({
op: UpdateCompressor.NOOP op: UpdateCompressor.NOOP,
meta: meta: {
start_ts: update.meta.start_ts or update.meta.ts start_ts: update.meta.start_ts || update.meta.ts,
end_ts: update.meta.end_ts or update.meta.ts end_ts: update.meta.end_ts || update.meta.ts,
user_id: update.meta.user_id user_id: update.meta.user_id
},
v: update.v v: update.v
else });
for op in ops } else {
splitUpdates.push for (let op of Array.from(ops)) {
op: op splitUpdates.push({
meta: op,
start_ts: update.meta.start_ts or update.meta.ts meta: {
end_ts: update.meta.end_ts or update.meta.ts start_ts: update.meta.start_ts || update.meta.ts,
end_ts: update.meta.end_ts || update.meta.ts,
user_id: update.meta.user_id user_id: update.meta.user_id
},
v: update.v v: update.v
return splitUpdates });
}
}
}
return splitUpdates;
},
concatUpdatesWithSameVersion: (updates) -> concatUpdatesWithSameVersion(updates) {
concattedUpdates = [] const concattedUpdates = [];
for update in updates for (let update of Array.from(updates)) {
lastUpdate = concattedUpdates[concattedUpdates.length - 1] const lastUpdate = concattedUpdates[concattedUpdates.length - 1];
if lastUpdate? and lastUpdate.v == update.v if ((lastUpdate != null) && (lastUpdate.v === update.v)) {
lastUpdate.op.push update.op unless update.op == UpdateCompressor.NOOP if (update.op !== UpdateCompressor.NOOP) { lastUpdate.op.push(update.op); }
else } else {
nextUpdate = const nextUpdate = {
op: [] op: [],
meta: update.meta meta: update.meta,
v: update.v v: update.v
nextUpdate.op.push update.op unless update.op == UpdateCompressor.NOOP };
concattedUpdates.push nextUpdate if (update.op !== UpdateCompressor.NOOP) { nextUpdate.op.push(update.op); }
return concattedUpdates concattedUpdates.push(nextUpdate);
}
}
return concattedUpdates;
},
compressRawUpdates: (lastPreviousUpdate, rawUpdates) -> compressRawUpdates(lastPreviousUpdate, rawUpdates) {
if lastPreviousUpdate?.op?.length > 1 if (__guard__(lastPreviousUpdate != null ? lastPreviousUpdate.op : undefined, x => x.length) > 1) {
# if the last previous update was an array op, don't compress onto it. // if the last previous update was an array op, don't compress onto it.
# The avoids cases where array length changes but version number doesn't // The avoids cases where array length changes but version number doesn't
return [lastPreviousUpdate].concat UpdateCompressor.compressRawUpdates(null,rawUpdates) return [lastPreviousUpdate].concat(UpdateCompressor.compressRawUpdates(null,rawUpdates));
if lastPreviousUpdate? }
rawUpdates = [lastPreviousUpdate].concat(rawUpdates) if (lastPreviousUpdate != null) {
updates = UpdateCompressor.convertToSingleOpUpdates(rawUpdates) rawUpdates = [lastPreviousUpdate].concat(rawUpdates);
updates = UpdateCompressor.compressUpdates(updates) }
return UpdateCompressor.concatUpdatesWithSameVersion(updates) let updates = UpdateCompressor.convertToSingleOpUpdates(rawUpdates);
updates = UpdateCompressor.compressUpdates(updates);
return UpdateCompressor.concatUpdatesWithSameVersion(updates);
},
compressUpdates: (updates) -> compressUpdates(updates) {
return [] if updates.length == 0 if (updates.length === 0) { return []; }
compressedUpdates = [updates.shift()] let compressedUpdates = [updates.shift()];
for update in updates for (let update of Array.from(updates)) {
lastCompressedUpdate = compressedUpdates.pop() const lastCompressedUpdate = compressedUpdates.pop();
if lastCompressedUpdate? if (lastCompressedUpdate != null) {
compressedUpdates = compressedUpdates.concat UpdateCompressor._concatTwoUpdates lastCompressedUpdate, update compressedUpdates = compressedUpdates.concat(UpdateCompressor._concatTwoUpdates(lastCompressedUpdate, update));
else } else {
compressedUpdates.push update compressedUpdates.push(update);
}
}
return compressedUpdates return compressedUpdates;
},
MAX_TIME_BETWEEN_UPDATES: oneMinute = 60 * 1000 MAX_TIME_BETWEEN_UPDATES: (oneMinute = 60 * 1000),
MAX_UPDATE_SIZE: twoMegabytes = 2* 1024 * 1024 MAX_UPDATE_SIZE: (twoMegabytes = 2* 1024 * 1024),
_concatTwoUpdates: (firstUpdate, secondUpdate) -> _concatTwoUpdates(firstUpdate, secondUpdate) {
firstUpdate = let offset;
op: firstUpdate.op firstUpdate = {
meta: op: firstUpdate.op,
user_id: firstUpdate.meta.user_id or null meta: {
start_ts: firstUpdate.meta.start_ts or firstUpdate.meta.ts user_id: firstUpdate.meta.user_id || null,
end_ts: firstUpdate.meta.end_ts or firstUpdate.meta.ts start_ts: firstUpdate.meta.start_ts || firstUpdate.meta.ts,
end_ts: firstUpdate.meta.end_ts || firstUpdate.meta.ts
},
v: firstUpdate.v v: firstUpdate.v
secondUpdate = };
op: secondUpdate.op secondUpdate = {
meta: op: secondUpdate.op,
user_id: secondUpdate.meta.user_id or null meta: {
start_ts: secondUpdate.meta.start_ts or secondUpdate.meta.ts user_id: secondUpdate.meta.user_id || null,
end_ts: secondUpdate.meta.end_ts or secondUpdate.meta.ts start_ts: secondUpdate.meta.start_ts || secondUpdate.meta.ts,
end_ts: secondUpdate.meta.end_ts || secondUpdate.meta.ts
},
v: secondUpdate.v v: secondUpdate.v
};
if firstUpdate.meta.user_id != secondUpdate.meta.user_id if (firstUpdate.meta.user_id !== secondUpdate.meta.user_id) {
return [firstUpdate, secondUpdate] return [firstUpdate, secondUpdate];
}
if secondUpdate.meta.start_ts - firstUpdate.meta.end_ts > UpdateCompressor.MAX_TIME_BETWEEN_UPDATES if ((secondUpdate.meta.start_ts - firstUpdate.meta.end_ts) > UpdateCompressor.MAX_TIME_BETWEEN_UPDATES) {
return [firstUpdate, secondUpdate] return [firstUpdate, secondUpdate];
}
firstOp = firstUpdate.op const firstOp = firstUpdate.op;
secondOp = secondUpdate.op const secondOp = secondUpdate.op;
firstSize = firstOp.i?.length or firstOp.d?.length const firstSize = (firstOp.i != null ? firstOp.i.length : undefined) || (firstOp.d != null ? firstOp.d.length : undefined);
secondSize = secondOp.i?.length or secondOp.d?.length const secondSize = (secondOp.i != null ? secondOp.i.length : undefined) || (secondOp.d != null ? secondOp.d.length : undefined);
# Two inserts // Two inserts
if firstOp.i? and secondOp.i? and firstOp.p <= secondOp.p <= (firstOp.p + firstOp.i.length) and firstSize + secondSize < UpdateCompressor.MAX_UPDATE_SIZE if ((firstOp.i != null) && (secondOp.i != null) && (firstOp.p <= secondOp.p && secondOp.p <= (firstOp.p + firstOp.i.length)) && ((firstSize + secondSize) < UpdateCompressor.MAX_UPDATE_SIZE)) {
return [ return [{
meta: meta: {
start_ts: firstUpdate.meta.start_ts start_ts: firstUpdate.meta.start_ts,
end_ts: secondUpdate.meta.end_ts end_ts: secondUpdate.meta.end_ts,
user_id: firstUpdate.meta.user_id user_id: firstUpdate.meta.user_id
op: },
p: firstOp.p op: {
p: firstOp.p,
i: strInject(firstOp.i, secondOp.p - firstOp.p, secondOp.i) i: strInject(firstOp.i, secondOp.p - firstOp.p, secondOp.i)
},
v: secondUpdate.v v: secondUpdate.v
] }
# Two deletes ];
else if firstOp.d? and secondOp.d? and secondOp.p <= firstOp.p <= (secondOp.p + secondOp.d.length) and firstSize + secondSize < UpdateCompressor.MAX_UPDATE_SIZE // Two deletes
return [ } else if ((firstOp.d != null) && (secondOp.d != null) && (secondOp.p <= firstOp.p && firstOp.p <= (secondOp.p + secondOp.d.length)) && ((firstSize + secondSize) < UpdateCompressor.MAX_UPDATE_SIZE)) {
meta: return [{
start_ts: firstUpdate.meta.start_ts meta: {
end_ts: secondUpdate.meta.end_ts start_ts: firstUpdate.meta.start_ts,
end_ts: secondUpdate.meta.end_ts,
user_id: firstUpdate.meta.user_id user_id: firstUpdate.meta.user_id
op: },
p: secondOp.p op: {
p: secondOp.p,
d: strInject(secondOp.d, firstOp.p - secondOp.p, firstOp.d) d: strInject(secondOp.d, firstOp.p - secondOp.p, firstOp.d)
},
v: secondUpdate.v v: secondUpdate.v
] }
# An insert and then a delete ];
else if firstOp.i? and secondOp.d? and firstOp.p <= secondOp.p <= (firstOp.p + firstOp.i.length) // An insert and then a delete
offset = secondOp.p - firstOp.p } else if ((firstOp.i != null) && (secondOp.d != null) && (firstOp.p <= secondOp.p && secondOp.p <= (firstOp.p + firstOp.i.length))) {
insertedText = firstOp.i.slice(offset, offset + secondOp.d.length) offset = secondOp.p - firstOp.p;
# Only trim the insert when the delete is fully contained within in it const insertedText = firstOp.i.slice(offset, offset + secondOp.d.length);
if insertedText == secondOp.d // Only trim the insert when the delete is fully contained within in it
insert = strRemove(firstOp.i, offset, secondOp.d.length) if (insertedText === secondOp.d) {
return [ const insert = strRemove(firstOp.i, offset, secondOp.d.length);
meta: return [{
start_ts: firstUpdate.meta.start_ts meta: {
end_ts: secondUpdate.meta.end_ts start_ts: firstUpdate.meta.start_ts,
end_ts: secondUpdate.meta.end_ts,
user_id: firstUpdate.meta.user_id user_id: firstUpdate.meta.user_id
op: },
p: firstOp.p op: {
p: firstOp.p,
i: insert i: insert
},
v: secondUpdate.v v: secondUpdate.v
] }
else ];
# This will only happen if the delete extends outside the insert } else {
return [firstUpdate, secondUpdate] // This will only happen if the delete extends outside the insert
return [firstUpdate, secondUpdate];
}
# A delete then an insert at the same place, likely a copy-paste of a chunk of content // A delete then an insert at the same place, likely a copy-paste of a chunk of content
else if firstOp.d? and secondOp.i? and firstOp.p == secondOp.p } else if ((firstOp.d != null) && (secondOp.i != null) && (firstOp.p === secondOp.p)) {
offset = firstOp.p offset = firstOp.p;
diff_ops = @diffAsShareJsOps(firstOp.d, secondOp.i) const diff_ops = this.diffAsShareJsOps(firstOp.d, secondOp.i);
if diff_ops.length == 0 if (diff_ops.length === 0) {
return [{ # Noop return [{ // Noop
meta: meta: {
start_ts: firstUpdate.meta.start_ts start_ts: firstUpdate.meta.start_ts,
end_ts: secondUpdate.meta.end_ts end_ts: secondUpdate.meta.end_ts,
user_id: firstUpdate.meta.user_id user_id: firstUpdate.meta.user_id
op: },
p: firstOp.p op: {
p: firstOp.p,
i: "" i: ""
},
v: secondUpdate.v v: secondUpdate.v
}] }];
else } else {
return diff_ops.map (op) -> return diff_ops.map(function(op) {
op.p += offset op.p += offset;
return { return {
meta: meta: {
start_ts: firstUpdate.meta.start_ts start_ts: firstUpdate.meta.start_ts,
end_ts: secondUpdate.meta.end_ts end_ts: secondUpdate.meta.end_ts,
user_id: firstUpdate.meta.user_id user_id: firstUpdate.meta.user_id
op: op },
op,
v: secondUpdate.v v: secondUpdate.v
} };});
}
else } else {
return [firstUpdate, secondUpdate] return [firstUpdate, secondUpdate];
}
},
ADDED: 1 ADDED: 1,
REMOVED: -1 REMOVED: -1,
UNCHANGED: 0 UNCHANGED: 0,
diffAsShareJsOps: (before, after, callback = (error, ops) ->) -> diffAsShareJsOps(before, after, callback) {
diffs = dmp.diff_main(before, after) if (callback == null) { callback = function(error, ops) {}; }
dmp.diff_cleanupSemantic(diffs) const diffs = dmp.diff_main(before, after);
dmp.diff_cleanupSemantic(diffs);
ops = [] const ops = [];
position = 0 let position = 0;
for diff in diffs for (let diff of Array.from(diffs)) {
type = diff[0] const type = diff[0];
content = diff[1] const content = diff[1];
if type == @ADDED if (type === this.ADDED) {
ops.push ops.push({
i: content i: content,
p: position p: position
position += content.length });
else if type == @REMOVED position += content.length;
ops.push } else if (type === this.REMOVED) {
d: content ops.push({
d: content,
p: position p: position
else if type == @UNCHANGED });
position += content.length } else if (type === this.UNCHANGED) {
else position += content.length;
throw "Unknown type" } else {
return ops throw "Unknown type";
}
}
return ops;
}
});
function __guard__(value, transform) {
return (typeof value !== 'undefined' && value !== null) ? transform(value) : undefined;
}

View file

@ -1,23 +1,44 @@
MongoManager = require "./MongoManager" /*
WebApiManager = require "./WebApiManager" * decaffeinate suggestions:
logger = require "logger-sharelatex" * DS102: Remove unnecessary code created because of implicit returns
* DS103: Rewrite code to no longer use __guard__
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let UpdateTrimmer;
const MongoManager = require("./MongoManager");
const WebApiManager = require("./WebApiManager");
const logger = require("logger-sharelatex");
module.exports = UpdateTrimmer = module.exports = (UpdateTrimmer = {
shouldTrimUpdates: (project_id, callback = (error, shouldTrim) ->) -> shouldTrimUpdates(project_id, callback) {
MongoManager.getProjectMetaData project_id, (error, metadata) -> if (callback == null) { callback = function(error, shouldTrim) {}; }
return callback(error) if error? return MongoManager.getProjectMetaData(project_id, function(error, metadata) {
if metadata?.preserveHistory if (error != null) { return callback(error); }
return callback null, false if (metadata != null ? metadata.preserveHistory : undefined) {
else return callback(null, false);
WebApiManager.getProjectDetails project_id, (error, details) -> } else {
return callback(error) if error? return WebApiManager.getProjectDetails(project_id, function(error, details) {
logger.log project_id: project_id, details: details, "got details" if (error != null) { return callback(error); }
if details?.features?.versioning logger.log({project_id, details}, "got details");
MongoManager.setProjectMetaData project_id, preserveHistory: true, (error) -> if (__guard__(details != null ? details.features : undefined, x => x.versioning)) {
return callback(error) if error? return MongoManager.setProjectMetaData(project_id, {preserveHistory: true}, function(error) {
MongoManager.upgradeHistory project_id, (error) -> if (error != null) { return callback(error); }
return callback(error) if error? return MongoManager.upgradeHistory(project_id, function(error) {
callback null, false if (error != null) { return callback(error); }
else return callback(null, false);
callback null, true });
});
} else {
return callback(null, true);
}
});
}
});
}
});
function __guard__(value, transform) {
return (typeof value !== 'undefined' && value !== null) ? transform(value) : undefined;
}

View file

@ -1,344 +1,494 @@
MongoManager = require "./MongoManager" /*
PackManager = require "./PackManager" * decaffeinate suggestions:
RedisManager = require "./RedisManager" * DS101: Remove unnecessary use of Array.from
UpdateCompressor = require "./UpdateCompressor" * DS102: Remove unnecessary code created because of implicit returns
LockManager = require "./LockManager" * DS103: Rewrite code to no longer use __guard__
WebApiManager = require "./WebApiManager" * DS205: Consider reworking code to avoid use of IIFEs
UpdateTrimmer = require "./UpdateTrimmer" * DS207: Consider shorter variations of null checks
logger = require "logger-sharelatex" * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
async = require "async" */
_ = require "underscore" let fiveMinutes, UpdatesManager;
Settings = require "settings-sharelatex" const MongoManager = require("./MongoManager");
keys = Settings.redis.lock.key_schema const PackManager = require("./PackManager");
const RedisManager = require("./RedisManager");
const UpdateCompressor = require("./UpdateCompressor");
const LockManager = require("./LockManager");
const WebApiManager = require("./WebApiManager");
const UpdateTrimmer = require("./UpdateTrimmer");
const logger = require("logger-sharelatex");
const async = require("async");
const _ = require("underscore");
const Settings = require("settings-sharelatex");
const keys = Settings.redis.lock.key_schema;
module.exports = UpdatesManager = module.exports = (UpdatesManager = {
compressAndSaveRawUpdates: (project_id, doc_id, rawUpdates, temporary, callback = (error) ->) -> compressAndSaveRawUpdates(project_id, doc_id, rawUpdates, temporary, callback) {
length = rawUpdates.length let i;
if length == 0 if (callback == null) { callback = function(error) {}; }
return callback() const { length } = rawUpdates;
if (length === 0) {
return callback();
}
# check that ops are in the correct order // check that ops are in the correct order
for op, i in rawUpdates when i > 0 for (i = 0; i < rawUpdates.length; i++) {
thisVersion = op?.v const op = rawUpdates[i];
prevVersion = rawUpdates[i-1]?.v if (i > 0) {
if not (prevVersion < thisVersion) const thisVersion = op != null ? op.v : undefined;
logger.error project_id: project_id, doc_id: doc_id, rawUpdates:rawUpdates, temporary: temporary, thisVersion:thisVersion, prevVersion:prevVersion, "op versions out of order" const prevVersion = __guard__(rawUpdates[i-1], x => x.v);
if (!(prevVersion < thisVersion)) {
logger.error({project_id, doc_id, rawUpdates, temporary, thisVersion, prevVersion}, "op versions out of order");
}
}
}
# FIXME: we no longer need the lastCompressedUpdate, so change functions not to need it // FIXME: we no longer need the lastCompressedUpdate, so change functions not to need it
# CORRECTION: we do use it to log the time in case of error // CORRECTION: we do use it to log the time in case of error
MongoManager.peekLastCompressedUpdate doc_id, (error, lastCompressedUpdate, lastVersion) -> return MongoManager.peekLastCompressedUpdate(doc_id, function(error, lastCompressedUpdate, lastVersion) {
# lastCompressedUpdate is the most recent update in Mongo, and // lastCompressedUpdate is the most recent update in Mongo, and
# lastVersion is its sharejs version number. // lastVersion is its sharejs version number.
# //
# The peekLastCompressedUpdate method may pass the update back // The peekLastCompressedUpdate method may pass the update back
# as 'null' (for example if the previous compressed update has // as 'null' (for example if the previous compressed update has
# been archived). In this case it can still pass back the // been archived). In this case it can still pass back the
# lastVersion from the update to allow us to check consistency. // lastVersion from the update to allow us to check consistency.
return callback(error) if error? let op;
if (error != null) { return callback(error); }
# Ensure that raw updates start where lastVersion left off // Ensure that raw updates start where lastVersion left off
if lastVersion? if (lastVersion != null) {
discardedUpdates = [] const discardedUpdates = [];
rawUpdates = rawUpdates.slice(0) rawUpdates = rawUpdates.slice(0);
while rawUpdates[0]? and rawUpdates[0].v <= lastVersion while ((rawUpdates[0] != null) && (rawUpdates[0].v <= lastVersion)) {
discardedUpdates.push rawUpdates.shift() discardedUpdates.push(rawUpdates.shift());
if discardedUpdates.length }
logger.error project_id: project_id, doc_id: doc_id, discardedUpdates: discardedUpdates, temporary: temporary, lastVersion: lastVersion, "discarded updates already present" if (discardedUpdates.length) {
logger.error({project_id, doc_id, discardedUpdates, temporary, lastVersion}, "discarded updates already present");
}
if rawUpdates[0]? and rawUpdates[0].v != lastVersion + 1 if ((rawUpdates[0] != null) && (rawUpdates[0].v !== (lastVersion + 1))) {
ts = lastCompressedUpdate?.meta?.end_ts const ts = __guard__(lastCompressedUpdate != null ? lastCompressedUpdate.meta : undefined, x1 => x1.end_ts);
last_timestamp = if ts? then new Date(ts) else 'unknown time' const last_timestamp = (ts != null) ? new Date(ts) : 'unknown time';
error = new Error("Tried to apply raw op at version #{rawUpdates[0].v} to last compressed update with version #{lastVersion} from #{last_timestamp}") error = new Error(`Tried to apply raw op at version ${rawUpdates[0].v} to last compressed update with version ${lastVersion} from ${last_timestamp}`);
logger.error err: error, doc_id: doc_id, project_id: project_id, prev_end_ts: ts, temporary: temporary, lastCompressedUpdate: lastCompressedUpdate, "inconsistent doc versions" logger.error({err: error, doc_id, project_id, prev_end_ts: ts, temporary, lastCompressedUpdate}, "inconsistent doc versions");
if Settings.trackchanges?.continueOnError and rawUpdates[0].v > lastVersion + 1 if ((Settings.trackchanges != null ? Settings.trackchanges.continueOnError : undefined) && (rawUpdates[0].v > (lastVersion + 1))) {
# we have lost some ops - continue to write into the database, we can't recover at this point // we have lost some ops - continue to write into the database, we can't recover at this point
lastCompressedUpdate = null lastCompressedUpdate = null;
else } else {
return callback error return callback(error);
}
}
}
if rawUpdates.length == 0 if (rawUpdates.length === 0) {
return callback() return callback();
}
# some old large ops in redis need to be rejected, they predate // some old large ops in redis need to be rejected, they predate
# the size limit that now prevents them going through the system // the size limit that now prevents them going through the system
REJECT_LARGE_OP_SIZE = 4 * 1024 * 1024 const REJECT_LARGE_OP_SIZE = 4 * 1024 * 1024;
for rawUpdate in rawUpdates for (var rawUpdate of Array.from(rawUpdates)) {
opSizes = ((op.i?.length || op.d?.length) for op in rawUpdate?.op or []) const opSizes = ((() => {
size = _.max opSizes const result = [];
if size > REJECT_LARGE_OP_SIZE for (op of Array.from((rawUpdate != null ? rawUpdate.op : undefined) || [])) { result.push(((op.i != null ? op.i.length : undefined) || (op.d != null ? op.d.length : undefined)));
error = new Error("dropped op exceeding maximum allowed size of #{REJECT_LARGE_OP_SIZE}") }
logger.error err: error, doc_id: doc_id, project_id: project_id, size: size, rawUpdate: rawUpdate, "dropped op - too big" return result;
rawUpdate.op = [] })());
const size = _.max(opSizes);
if (size > REJECT_LARGE_OP_SIZE) {
error = new Error(`dropped op exceeding maximum allowed size of ${REJECT_LARGE_OP_SIZE}`);
logger.error({err: error, doc_id, project_id, size, rawUpdate}, "dropped op - too big");
rawUpdate.op = [];
}
}
compressedUpdates = UpdateCompressor.compressRawUpdates null, rawUpdates const compressedUpdates = UpdateCompressor.compressRawUpdates(null, rawUpdates);
PackManager.insertCompressedUpdates project_id, doc_id, lastCompressedUpdate, compressedUpdates, temporary, (error, result) -> return PackManager.insertCompressedUpdates(project_id, doc_id, lastCompressedUpdate, compressedUpdates, temporary, function(error, result) {
return callback(error) if error? if (error != null) { return callback(error); }
logger.log {project_id, doc_id, orig_v: lastCompressedUpdate?.v, new_v: result.v}, "inserted updates into pack" if result? if (result != null) { logger.log({project_id, doc_id, orig_v: (lastCompressedUpdate != null ? lastCompressedUpdate.v : undefined), new_v: result.v}, "inserted updates into pack"); }
callback() return callback();
});
});
},
# Check whether the updates are temporary (per-project property) // Check whether the updates are temporary (per-project property)
_prepareProjectForUpdates: (project_id, callback = (error, temporary) ->) -> _prepareProjectForUpdates(project_id, callback) {
UpdateTrimmer.shouldTrimUpdates project_id, (error, temporary) -> if (callback == null) { callback = function(error, temporary) {}; }
return callback(error) if error? return UpdateTrimmer.shouldTrimUpdates(project_id, function(error, temporary) {
callback(null, temporary) if (error != null) { return callback(error); }
return callback(null, temporary);
});
},
# Check for project id on document history (per-document property) // Check for project id on document history (per-document property)
_prepareDocForUpdates: (project_id, doc_id, callback = (error) ->) -> _prepareDocForUpdates(project_id, doc_id, callback) {
MongoManager.backportProjectId project_id, doc_id, (error) -> if (callback == null) { callback = function(error) {}; }
return callback(error) if error? return MongoManager.backportProjectId(project_id, doc_id, function(error) {
callback(null) if (error != null) { return callback(error); }
return callback(null);
});
},
# Apply updates for specific project/doc after preparing at project and doc level // Apply updates for specific project/doc after preparing at project and doc level
REDIS_READ_BATCH_SIZE: 100 REDIS_READ_BATCH_SIZE: 100,
processUncompressedUpdates: (project_id, doc_id, temporary, callback = (error) ->) -> processUncompressedUpdates(project_id, doc_id, temporary, callback) {
# get the updates as strings from redis (so we can delete them after they are applied) // get the updates as strings from redis (so we can delete them after they are applied)
RedisManager.getOldestDocUpdates doc_id, UpdatesManager.REDIS_READ_BATCH_SIZE, (error, docUpdates) -> if (callback == null) { callback = function(error) {}; }
return callback(error) if error? return RedisManager.getOldestDocUpdates(doc_id, UpdatesManager.REDIS_READ_BATCH_SIZE, function(error, docUpdates) {
length = docUpdates.length if (error != null) { return callback(error); }
# parse the redis strings into ShareJs updates const { length } = docUpdates;
RedisManager.expandDocUpdates docUpdates, (error, rawUpdates) -> // parse the redis strings into ShareJs updates
if error? return RedisManager.expandDocUpdates(docUpdates, function(error, rawUpdates) {
logger.err project_id: project_id, doc_id: doc_id, docUpdates: docUpdates, "failed to parse docUpdates" if (error != null) {
return callback(error) logger.err({project_id, doc_id, docUpdates}, "failed to parse docUpdates");
logger.log project_id: project_id, doc_id: doc_id, rawUpdates: rawUpdates, "retrieved raw updates from redis" return callback(error);
UpdatesManager.compressAndSaveRawUpdates project_id, doc_id, rawUpdates, temporary, (error) -> }
return callback(error) if error? logger.log({project_id, doc_id, rawUpdates}, "retrieved raw updates from redis");
logger.log project_id: project_id, doc_id: doc_id, "compressed and saved doc updates" return UpdatesManager.compressAndSaveRawUpdates(project_id, doc_id, rawUpdates, temporary, function(error) {
# delete the applied updates from redis if (error != null) { return callback(error); }
RedisManager.deleteAppliedDocUpdates project_id, doc_id, docUpdates, (error) -> logger.log({project_id, doc_id}, "compressed and saved doc updates");
return callback(error) if error? // delete the applied updates from redis
if length == UpdatesManager.REDIS_READ_BATCH_SIZE return RedisManager.deleteAppliedDocUpdates(project_id, doc_id, docUpdates, function(error) {
# There might be more updates if (error != null) { return callback(error); }
logger.log project_id: project_id, doc_id: doc_id, "continuing processing updates" if (length === UpdatesManager.REDIS_READ_BATCH_SIZE) {
setTimeout () -> // There might be more updates
UpdatesManager.processUncompressedUpdates project_id, doc_id, temporary, callback logger.log({project_id, doc_id}, "continuing processing updates");
, 0 return setTimeout(() => UpdatesManager.processUncompressedUpdates(project_id, doc_id, temporary, callback)
else , 0);
logger.log project_id: project_id, doc_id: doc_id, "all raw updates processed" } else {
callback() logger.log({project_id, doc_id}, "all raw updates processed");
return callback();
}
});
});
});
});
},
# Process updates for a doc when we flush it individually // Process updates for a doc when we flush it individually
processUncompressedUpdatesWithLock: (project_id, doc_id, callback = (error) ->) -> processUncompressedUpdatesWithLock(project_id, doc_id, callback) {
UpdatesManager._prepareProjectForUpdates project_id, (error, temporary) -> if (callback == null) { callback = function(error) {}; }
return callback(error) if error? return UpdatesManager._prepareProjectForUpdates(project_id, function(error, temporary) {
UpdatesManager._processUncompressedUpdatesForDocWithLock project_id, doc_id, temporary, callback if (error != null) { return callback(error); }
return UpdatesManager._processUncompressedUpdatesForDocWithLock(project_id, doc_id, temporary, callback);
});
},
# Process updates for a doc when the whole project is flushed (internal method) // Process updates for a doc when the whole project is flushed (internal method)
_processUncompressedUpdatesForDocWithLock: (project_id, doc_id, temporary, callback = (error) ->) -> _processUncompressedUpdatesForDocWithLock(project_id, doc_id, temporary, callback) {
UpdatesManager._prepareDocForUpdates project_id, doc_id, (error) -> if (callback == null) { callback = function(error) {}; }
return callback(error) if error? return UpdatesManager._prepareDocForUpdates(project_id, doc_id, function(error) {
LockManager.runWithLock( if (error != null) { return callback(error); }
return LockManager.runWithLock(
keys.historyLock({doc_id}), keys.historyLock({doc_id}),
(releaseLock) -> releaseLock => UpdatesManager.processUncompressedUpdates(project_id, doc_id, temporary, releaseLock),
UpdatesManager.processUncompressedUpdates project_id, doc_id, temporary, releaseLock
callback callback
) );
});
},
# Process all updates for a project, only check project-level information once // Process all updates for a project, only check project-level information once
processUncompressedUpdatesForProject: (project_id, callback = (error) ->) -> processUncompressedUpdatesForProject(project_id, callback) {
RedisManager.getDocIdsWithHistoryOps project_id, (error, doc_ids) -> if (callback == null) { callback = function(error) {}; }
return callback(error) if error? return RedisManager.getDocIdsWithHistoryOps(project_id, function(error, doc_ids) {
UpdatesManager._prepareProjectForUpdates project_id, (error, temporary) -> if (error != null) { return callback(error); }
jobs = [] return UpdatesManager._prepareProjectForUpdates(project_id, function(error, temporary) {
for doc_id in doc_ids const jobs = [];
do (doc_id) -> for (let doc_id of Array.from(doc_ids)) {
jobs.push (cb) -> (doc_id =>
UpdatesManager._processUncompressedUpdatesForDocWithLock project_id, doc_id, temporary, cb jobs.push(cb => UpdatesManager._processUncompressedUpdatesForDocWithLock(project_id, doc_id, temporary, cb))
async.parallelLimit jobs, 5, callback )(doc_id);
}
return async.parallelLimit(jobs, 5, callback);
});
});
},
# flush all outstanding changes // flush all outstanding changes
flushAll: (limit, callback = (error, result) ->) -> flushAll(limit, callback) {
RedisManager.getProjectIdsWithHistoryOps (error, project_ids) -> if (callback == null) { callback = function(error, result) {}; }
return callback(error) if error? return RedisManager.getProjectIdsWithHistoryOps(function(error, project_ids) {
logger.log {count: project_ids?.length, project_ids: project_ids}, "found projects" let project_id;
jobs = [] if (error != null) { return callback(error); }
project_ids = _.shuffle project_ids # randomise to avoid hitting same projects each time logger.log({count: (project_ids != null ? project_ids.length : undefined), project_ids}, "found projects");
selectedProjects = if limit < 0 then project_ids else project_ids[0...limit] const jobs = [];
for project_id in selectedProjects project_ids = _.shuffle(project_ids); // randomise to avoid hitting same projects each time
do (project_id) -> const selectedProjects = limit < 0 ? project_ids : project_ids.slice(0, limit);
jobs.push (cb) -> for (project_id of Array.from(selectedProjects)) {
UpdatesManager.processUncompressedUpdatesForProject project_id, (err) -> (project_id =>
return cb(null, {failed: err?, project_id: project_id}) jobs.push(cb =>
async.series jobs, (error, result) -> UpdatesManager.processUncompressedUpdatesForProject(project_id, err => cb(null, {failed: (err != null), project_id}))
return callback(error) if error? )
failedProjects = (x.project_id for x in result when x.failed) )(project_id);
succeededProjects = (x.project_id for x in result when not x.failed) }
callback(null, {failed: failedProjects, succeeded: succeededProjects, all: project_ids}) return async.series(jobs, function(error, result) {
let x;
if (error != null) { return callback(error); }
const failedProjects = ((() => {
const result1 = [];
for (x of Array.from(result)) { if (x.failed) {
result1.push(x.project_id);
}
}
return result1;
})());
const succeededProjects = ((() => {
const result2 = [];
for (x of Array.from(result)) { if (!x.failed) {
result2.push(x.project_id);
}
}
return result2;
})());
return callback(null, {failed: failedProjects, succeeded: succeededProjects, all: project_ids});
});
});
},
getDanglingUpdates: (callback = (error, doc_ids) ->) -> getDanglingUpdates(callback) {
RedisManager.getAllDocIdsWithHistoryOps (error, all_doc_ids) -> if (callback == null) { callback = function(error, doc_ids) {}; }
return callback(error) if error? return RedisManager.getAllDocIdsWithHistoryOps(function(error, all_doc_ids) {
RedisManager.getProjectIdsWithHistoryOps (error, all_project_ids) -> if (error != null) { return callback(error); }
return callback(error) if error? return RedisManager.getProjectIdsWithHistoryOps(function(error, all_project_ids) {
# function to get doc_ids for each project if (error != null) { return callback(error); }
task = (cb) -> async.concatSeries all_project_ids, RedisManager.getDocIdsWithHistoryOps, cb // function to get doc_ids for each project
# find the dangling doc ids const task = cb => async.concatSeries(all_project_ids, RedisManager.getDocIdsWithHistoryOps, cb);
task (error, project_doc_ids) -> // find the dangling doc ids
dangling_doc_ids = _.difference(all_doc_ids, project_doc_ids) return task(function(error, project_doc_ids) {
logger.log {all_doc_ids: all_doc_ids, all_project_ids: all_project_ids, project_doc_ids: project_doc_ids, dangling_doc_ids: dangling_doc_ids}, "checking for dangling doc ids" const dangling_doc_ids = _.difference(all_doc_ids, project_doc_ids);
callback(null, dangling_doc_ids) logger.log({all_doc_ids, all_project_ids, project_doc_ids, dangling_doc_ids}, "checking for dangling doc ids");
return callback(null, dangling_doc_ids);
});
});
});
},
getDocUpdates: (project_id, doc_id, options = {}, callback = (error, updates) ->) -> getDocUpdates(project_id, doc_id, options, callback) {
UpdatesManager.processUncompressedUpdatesWithLock project_id, doc_id, (error) -> if (options == null) { options = {}; }
return callback(error) if error? if (callback == null) { callback = function(error, updates) {}; }
#console.log "options", options return UpdatesManager.processUncompressedUpdatesWithLock(project_id, doc_id, function(error) {
PackManager.getOpsByVersionRange project_id, doc_id, options.from, options.to, (error, updates) -> if (error != null) { return callback(error); }
return callback(error) if error? //console.log "options", options
callback null, updates return PackManager.getOpsByVersionRange(project_id, doc_id, options.from, options.to, function(error, updates) {
if (error != null) { return callback(error); }
return callback(null, updates);
});
});
},
getDocUpdatesWithUserInfo: (project_id, doc_id, options = {}, callback = (error, updates) ->) -> getDocUpdatesWithUserInfo(project_id, doc_id, options, callback) {
UpdatesManager.getDocUpdates project_id, doc_id, options, (error, updates) -> if (options == null) { options = {}; }
return callback(error) if error? if (callback == null) { callback = function(error, updates) {}; }
UpdatesManager.fillUserInfo updates, (error, updates) -> return UpdatesManager.getDocUpdates(project_id, doc_id, options, function(error, updates) {
return callback(error) if error? if (error != null) { return callback(error); }
callback null, updates return UpdatesManager.fillUserInfo(updates, function(error, updates) {
if (error != null) { return callback(error); }
return callback(null, updates);
});
});
},
getSummarizedProjectUpdates: (project_id, options = {}, callback = (error, updates) ->) -> getSummarizedProjectUpdates(project_id, options, callback) {
options.min_count ||= 25 if (options == null) { options = {}; }
summarizedUpdates = [] if (callback == null) { callback = function(error, updates) {}; }
before = options.before if (!options.min_count) { options.min_count = 25; }
nextBeforeTimestamp = null let summarizedUpdates = [];
UpdatesManager.processUncompressedUpdatesForProject project_id, (error) -> const { before } = options;
return callback(error) if error? let nextBeforeTimestamp = null;
PackManager.makeProjectIterator project_id, before, (err, iterator) -> return UpdatesManager.processUncompressedUpdatesForProject(project_id, function(error) {
return callback(err) if err? if (error != null) { return callback(error); }
# repeatedly get updates and pass them through the summariser to get an final output with user info return PackManager.makeProjectIterator(project_id, before, function(err, iterator) {
async.whilst () -> if (err != null) { return callback(err); }
#console.log "checking iterator.done", iterator.done() // repeatedly get updates and pass them through the summariser to get an final output with user info
return summarizedUpdates.length < options.min_count and not iterator.done() return async.whilst(() =>
, (cb) -> //console.log "checking iterator.done", iterator.done()
iterator.next (err, partialUpdates) -> (summarizedUpdates.length < options.min_count) && !iterator.done()
return callback(err) if err?
#logger.log {partialUpdates}, 'got partialUpdates'
return cb() if partialUpdates.length is 0 ## FIXME should try to avoid this happening
nextBeforeTimestamp = partialUpdates[partialUpdates.length - 1].meta.end_ts
# add the updates to the summary list
summarizedUpdates = UpdatesManager._summarizeUpdates partialUpdates, summarizedUpdates
cb()
, () ->
# finally done all updates
#console.log 'summarized Updates', summarizedUpdates
UpdatesManager.fillSummarizedUserInfo summarizedUpdates, (err, results) ->
return callback(err) if err?
callback null, results, if not iterator.done() then nextBeforeTimestamp else undefined
fetchUserInfo: (users, callback = (error, fetchedUserInfo) ->) -> , cb =>
jobs = [] iterator.next(function(err, partialUpdates) {
fetchedUserInfo = {} if (err != null) { return callback(err); }
for user_id of users //logger.log {partialUpdates}, 'got partialUpdates'
do (user_id) -> if (partialUpdates.length === 0) { return cb(); } //# FIXME should try to avoid this happening
jobs.push (callback) -> nextBeforeTimestamp = partialUpdates[partialUpdates.length - 1].meta.end_ts;
WebApiManager.getUserInfo user_id, (error, userInfo) -> // add the updates to the summary list
return callback(error) if error? summarizedUpdates = UpdatesManager._summarizeUpdates(partialUpdates, summarizedUpdates);
fetchedUserInfo[user_id] = userInfo return cb();
callback() })
async.series jobs, (err) -> , () =>
return callback(err) if err? // finally done all updates
callback(null, fetchedUserInfo) //console.log 'summarized Updates', summarizedUpdates
UpdatesManager.fillSummarizedUserInfo(summarizedUpdates, function(err, results) {
if (err != null) { return callback(err); }
return callback(null, results, !iterator.done() ? nextBeforeTimestamp : undefined);
})
);
});
});
},
fillUserInfo: (updates, callback = (error, updates) ->) -> fetchUserInfo(users, callback) {
users = {} if (callback == null) { callback = function(error, fetchedUserInfo) {}; }
for update in updates const jobs = [];
user_id = update.meta.user_id const fetchedUserInfo = {};
if UpdatesManager._validUserId(user_id) for (let user_id in users) {
users[user_id] = true (user_id =>
jobs.push(callback =>
WebApiManager.getUserInfo(user_id, function(error, userInfo) {
if (error != null) { return callback(error); }
fetchedUserInfo[user_id] = userInfo;
return callback();
})
)
)(user_id);
}
UpdatesManager.fetchUserInfo users, (error, fetchedUserInfo) -> return async.series(jobs, function(err) {
return callback(error) if error? if (err != null) { return callback(err); }
for update in updates return callback(null, fetchedUserInfo);
user_id = update.meta.user_id });
delete update.meta.user_id },
if UpdatesManager._validUserId(user_id)
update.meta.user = fetchedUserInfo[user_id]
callback null, updates
fillSummarizedUserInfo: (updates, callback = (error, updates) ->) -> fillUserInfo(updates, callback) {
users = {} let update, user_id;
for update in updates if (callback == null) { callback = function(error, updates) {}; }
user_ids = update.meta.user_ids or [] const users = {};
for user_id in user_ids for (update of Array.from(updates)) {
if UpdatesManager._validUserId(user_id) ({ user_id } = update.meta);
users[user_id] = true if (UpdatesManager._validUserId(user_id)) {
users[user_id] = true;
}
}
UpdatesManager.fetchUserInfo users, (error, fetchedUserInfo) -> return UpdatesManager.fetchUserInfo(users, function(error, fetchedUserInfo) {
return callback(error) if error? if (error != null) { return callback(error); }
for update in updates for (update of Array.from(updates)) {
user_ids = update.meta.user_ids or [] ({ user_id } = update.meta);
update.meta.users = [] delete update.meta.user_id;
delete update.meta.user_ids if (UpdatesManager._validUserId(user_id)) {
for user_id in user_ids update.meta.user = fetchedUserInfo[user_id];
if UpdatesManager._validUserId(user_id) }
update.meta.users.push fetchedUserInfo[user_id] }
else return callback(null, updates);
update.meta.users.push null });
callback null, updates },
_validUserId: (user_id) -> fillSummarizedUserInfo(updates, callback) {
if !user_id? let update, user_id, user_ids;
return false if (callback == null) { callback = function(error, updates) {}; }
else const users = {};
return !!user_id.match(/^[a-f0-9]{24}$/) for (update of Array.from(updates)) {
user_ids = update.meta.user_ids || [];
for (user_id of Array.from(user_ids)) {
if (UpdatesManager._validUserId(user_id)) {
users[user_id] = true;
}
}
}
TIME_BETWEEN_DISTINCT_UPDATES: fiveMinutes = 5 * 60 * 1000 return UpdatesManager.fetchUserInfo(users, function(error, fetchedUserInfo) {
SPLIT_ON_DELETE_SIZE: 16 # characters if (error != null) { return callback(error); }
_summarizeUpdates: (updates, existingSummarizedUpdates = []) -> for (update of Array.from(updates)) {
summarizedUpdates = existingSummarizedUpdates.slice() user_ids = update.meta.user_ids || [];
previousUpdateWasBigDelete = false update.meta.users = [];
for update in updates delete update.meta.user_ids;
earliestUpdate = summarizedUpdates[summarizedUpdates.length - 1] for (user_id of Array.from(user_ids)) {
shouldConcat = false if (UpdatesManager._validUserId(user_id)) {
update.meta.users.push(fetchedUserInfo[user_id]);
} else {
update.meta.users.push(null);
}
}
}
return callback(null, updates);
});
},
# If a user inserts some text, then deletes a big chunk including that text, _validUserId(user_id) {
# the update we show might concat the insert and delete, and there will be no sign if ((user_id == null)) {
# of that insert having happened, or be able to restore to it (restoring after a big delete is common). return false;
# So, we split the summary on 'big' deletes. However, we've stepping backwards in time with } else {
# most recent changes considered first, so if this update is a big delete, we want to start return !!user_id.match(/^[a-f0-9]{24}$/);
# a new summarized update next timge, hence we monitor the previous update. }
if previousUpdateWasBigDelete },
shouldConcat = false
else if earliestUpdate and earliestUpdate.meta.end_ts - update.meta.start_ts < @TIME_BETWEEN_DISTINCT_UPDATES
# We're going backwards in time through the updates, so only combine if this update starts less than 5 minutes before
# the end of current summarized block, so no block spans more than 5 minutes.
shouldConcat = true
isBigDelete = false TIME_BETWEEN_DISTINCT_UPDATES: (fiveMinutes = 5 * 60 * 1000),
for op in update.op or [] SPLIT_ON_DELETE_SIZE: 16, // characters
if op.d? and op.d.length > @SPLIT_ON_DELETE_SIZE _summarizeUpdates(updates, existingSummarizedUpdates) {
isBigDelete = true if (existingSummarizedUpdates == null) { existingSummarizedUpdates = []; }
const summarizedUpdates = existingSummarizedUpdates.slice();
let previousUpdateWasBigDelete = false;
for (let update of Array.from(updates)) {
var doc_id;
const earliestUpdate = summarizedUpdates[summarizedUpdates.length - 1];
let shouldConcat = false;
previousUpdateWasBigDelete = isBigDelete // If a user inserts some text, then deletes a big chunk including that text,
// the update we show might concat the insert and delete, and there will be no sign
// of that insert having happened, or be able to restore to it (restoring after a big delete is common).
// So, we split the summary on 'big' deletes. However, we've stepping backwards in time with
// most recent changes considered first, so if this update is a big delete, we want to start
// a new summarized update next timge, hence we monitor the previous update.
if (previousUpdateWasBigDelete) {
shouldConcat = false;
} else if (earliestUpdate && ((earliestUpdate.meta.end_ts - update.meta.start_ts) < this.TIME_BETWEEN_DISTINCT_UPDATES)) {
// We're going backwards in time through the updates, so only combine if this update starts less than 5 minutes before
// the end of current summarized block, so no block spans more than 5 minutes.
shouldConcat = true;
}
if shouldConcat let isBigDelete = false;
# check if the user in this update is already present in the earliest update, for (let op of Array.from(update.op || [])) {
# if not, add them to the users list of the earliest update if ((op.d != null) && (op.d.length > this.SPLIT_ON_DELETE_SIZE)) {
earliestUpdate.meta.user_ids = _.union earliestUpdate.meta.user_ids, [update.meta.user_id] isBigDelete = true;
}
}
doc_id = update.doc_id.toString() previousUpdateWasBigDelete = isBigDelete;
doc = earliestUpdate.docs[doc_id]
if doc? if (shouldConcat) {
doc.fromV = Math.min(doc.fromV, update.v) // check if the user in this update is already present in the earliest update,
doc.toV = Math.max(doc.toV, update.v) // if not, add them to the users list of the earliest update
else earliestUpdate.meta.user_ids = _.union(earliestUpdate.meta.user_ids, [update.meta.user_id]);
earliestUpdate.docs[doc_id] =
fromV: update.v doc_id = update.doc_id.toString();
const doc = earliestUpdate.docs[doc_id];
if (doc != null) {
doc.fromV = Math.min(doc.fromV, update.v);
doc.toV = Math.max(doc.toV, update.v);
} else {
earliestUpdate.docs[doc_id] = {
fromV: update.v,
toV: update.v toV: update.v
};
}
earliestUpdate.meta.start_ts = Math.min(earliestUpdate.meta.start_ts, update.meta.start_ts) earliestUpdate.meta.start_ts = Math.min(earliestUpdate.meta.start_ts, update.meta.start_ts);
earliestUpdate.meta.end_ts = Math.max(earliestUpdate.meta.end_ts, update.meta.end_ts) earliestUpdate.meta.end_ts = Math.max(earliestUpdate.meta.end_ts, update.meta.end_ts);
else } else {
newUpdate = const newUpdate = {
meta: meta: {
user_ids: [] user_ids: [],
start_ts: update.meta.start_ts start_ts: update.meta.start_ts,
end_ts: update.meta.end_ts end_ts: update.meta.end_ts
},
docs: {} docs: {}
};
newUpdate.docs[update.doc_id.toString()] = newUpdate.docs[update.doc_id.toString()] = {
fromV: update.v fromV: update.v,
toV: update.v toV: update.v
newUpdate.meta.user_ids.push update.meta.user_id };
summarizedUpdates.push newUpdate newUpdate.meta.user_ids.push(update.meta.user_id);
summarizedUpdates.push(newUpdate);
}
}
return summarizedUpdates return summarizedUpdates;
}
});
function __guard__(value, transform) {
return (typeof value !== 'undefined' && value !== null) ? transform(value) : undefined;
}

View file

@ -1,69 +1,99 @@
request = require "requestretry" # allow retry on error https://github.com/FGRibreau/node-request-retry /*
logger = require "logger-sharelatex" * decaffeinate suggestions:
Settings = require "settings-sharelatex" * DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let WebApiManager;
const request = require("requestretry"); // allow retry on error https://github.com/FGRibreau/node-request-retry
const logger = require("logger-sharelatex");
const Settings = require("settings-sharelatex");
# Don't let HTTP calls hang for a long time // Don't let HTTP calls hang for a long time
MAX_HTTP_REQUEST_LENGTH = 15000 # 15 seconds const MAX_HTTP_REQUEST_LENGTH = 15000; // 15 seconds
# DEPRECATED! This method of getting user details via track-changes is deprecated // DEPRECATED! This method of getting user details via track-changes is deprecated
# in the way we lay out our services. // in the way we lay out our services.
# Instead, web should be responsible for collecting the raw data (user_ids) and // Instead, web should be responsible for collecting the raw data (user_ids) and
# filling it out with calls to other services. All API calls should create a // filling it out with calls to other services. All API calls should create a
# tree-like structure as much as possible, with web as the root. // tree-like structure as much as possible, with web as the root.
module.exports = WebApiManager = module.exports = (WebApiManager = {
sendRequest: (url, callback = (error, body) ->) -> sendRequest(url, callback) {
request.get { if (callback == null) { callback = function(error, body) {}; }
url: "#{Settings.apis.web.url}#{url}" return request.get({
timeout: MAX_HTTP_REQUEST_LENGTH url: `${Settings.apis.web.url}${url}`,
maxAttempts: 2 # for node-request-retry timeout: MAX_HTTP_REQUEST_LENGTH,
auth: maxAttempts: 2, // for node-request-retry
user: Settings.apis.web.user auth: {
pass: Settings.apis.web.pass user: Settings.apis.web.user,
pass: Settings.apis.web.pass,
sendImmediately: true sendImmediately: true
}, (error, res, body)-> }
if error? }, function(error, res, body){
return callback(error) if (error != null) {
if res.statusCode == 404 return callback(error);
logger.log url: url, "got 404 from web api" }
return callback null, null if (res.statusCode === 404) {
if res.statusCode >= 200 and res.statusCode < 300 logger.log({url}, "got 404 from web api");
return callback null, body return callback(null, null);
else }
error = new Error("web returned a non-success status code: #{res.statusCode} (attempts: #{res.attempts})") if ((res.statusCode >= 200) && (res.statusCode < 300)) {
callback error return callback(null, body);
} else {
error = new Error(`web returned a non-success status code: ${res.statusCode} (attempts: ${res.attempts})`);
return callback(error);
}
});
},
getUserInfo: (user_id, callback = (error, userInfo) ->) -> getUserInfo(user_id, callback) {
url = "/user/#{user_id}/personal_info" if (callback == null) { callback = function(error, userInfo) {}; }
logger.log user_id: user_id, "getting user info from web" const url = `/user/${user_id}/personal_info`;
WebApiManager.sendRequest url, (error, body) -> logger.log({user_id}, "getting user info from web");
if error? return WebApiManager.sendRequest(url, function(error, body) {
logger.error err: error, user_id: user_id, url: url, "error accessing web" let user;
return callback error if (error != null) {
logger.error({err: error, user_id, url}, "error accessing web");
if body == null return callback(error);
logger.error user_id: user_id, url: url, "no user found"
return callback null, null
try
user = JSON.parse(body)
catch error
return callback(error)
callback null, {
id: user.id
email: user.email
first_name: user.first_name
last_name: user.last_name
} }
getProjectDetails: (project_id, callback = (error, details) ->) -> if (body === null) {
url = "/project/#{project_id}/details" logger.error({user_id, url}, "no user found");
logger.log project_id: project_id, "getting project details from web" return callback(null, null);
WebApiManager.sendRequest url, (error, body) -> }
if error? try {
logger.error err: error, project_id: project_id, url: url, "error accessing web" user = JSON.parse(body);
return callback error } catch (error1) {
error = error1;
return callback(error);
}
return callback(null, {
id: user.id,
email: user.email,
first_name: user.first_name,
last_name: user.last_name
});
});
},
try getProjectDetails(project_id, callback) {
project = JSON.parse(body) if (callback == null) { callback = function(error, details) {}; }
catch error const url = `/project/${project_id}/details`;
return callback(error) logger.log({project_id}, "getting project details from web");
callback null, project return WebApiManager.sendRequest(url, function(error, body) {
let project;
if (error != null) {
logger.error({err: error, project_id, url}, "error accessing web");
return callback(error);
}
try {
project = JSON.parse(body);
} catch (error1) {
error = error1;
return callback(error);
}
return callback(null, project);
});
}
});

View file

@ -1,9 +1,10 @@
Settings = require "settings-sharelatex" const Settings = require("settings-sharelatex");
mongojs = require "mongojs" const mongojs = require("mongojs");
bson = require "bson" const bson = require("bson");
db = mongojs(Settings.mongo.url, ["docHistory", "projectHistoryMetaData", "docHistoryIndex"]) const db = mongojs(Settings.mongo.url, ["docHistory", "projectHistoryMetaData", "docHistoryIndex"]);
module.exports = module.exports = {
db: db db,
ObjectId: mongojs.ObjectId ObjectId: mongojs.ObjectId,
BSON: new bson.BSONPure() BSON: new bson.BSONPure()
};