decaffeinate: Convert DiffGenerator.coffee and 17 other files to JS

This commit is contained in:
decaffeinate 2020-02-17 18:34:04 +01:00 committed by mserranom
parent a971c5895b
commit 57345632e0
18 changed files with 2834 additions and 2000 deletions

View file

@ -1,227 +1,293 @@
ConsistencyError = (message) -> /*
error = new Error(message) * decaffeinate suggestions:
error.name = "ConsistencyError" * DS101: Remove unnecessary use of Array.from
error.__proto__ = ConsistencyError.prototype * DS102: Remove unnecessary code created because of implicit returns
return error * DS207: Consider shorter variations of null checks
ConsistencyError.prototype.__proto__ = Error.prototype * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let DiffGenerator;
var ConsistencyError = function(message) {
const error = new Error(message);
error.name = "ConsistencyError";
error.__proto__ = ConsistencyError.prototype;
return error;
};
ConsistencyError.prototype.__proto__ = Error.prototype;
logger = require "logger-sharelatex" const logger = require("logger-sharelatex");
module.exports = DiffGenerator = module.exports = (DiffGenerator = {
ConsistencyError: ConsistencyError ConsistencyError,
rewindUpdate: (content, update) -> rewindUpdate(content, update) {
for op, i in update.op by -1 when op.broken isnt true for (let j = update.op.length - 1, i = j; j >= 0; j--, i = j) {
try const op = update.op[i];
content = DiffGenerator.rewindOp content, op if (op.broken !== true) {
catch e try {
if e instanceof ConsistencyError and i = update.op.length - 1 content = DiffGenerator.rewindOp(content, op);
# catch known case where the last op in an array has been } catch (e) {
# merged into a later op if (e instanceof ConsistencyError && (i = update.op.length - 1)) {
logger.error {err: e, update, op: JSON.stringify(op)}, "marking op as broken" // catch known case where the last op in an array has been
op.broken = true // merged into a later op
else logger.error({err: e, update, op: JSON.stringify(op)}, "marking op as broken");
throw e # rethrow the execption op.broken = true;
return content } else {
throw e; // rethrow the execption
}
}
}
}
return content;
},
rewindOp: (content, op) -> rewindOp(content, op) {
if op.i? let p;
# ShareJS will accept an op where p > content.length when applied, if (op.i != null) {
# and it applies as though p == content.length. However, the op is // ShareJS will accept an op where p > content.length when applied,
# passed to us with the original p > content.length. Detect if that // and it applies as though p == content.length. However, the op is
# is the case with this op, and shift p back appropriately to match // passed to us with the original p > content.length. Detect if that
# ShareJS if so. // is the case with this op, and shift p back appropriately to match
p = op.p // ShareJS if so.
max_p = content.length - op.i.length ({ p } = op);
if p > max_p const max_p = content.length - op.i.length;
logger.warn {max_p, p}, "truncating position to content length" if (p > max_p) {
p = max_p logger.warn({max_p, p}, "truncating position to content length");
p = max_p;
}
textToBeRemoved = content.slice(p, p + op.i.length) const textToBeRemoved = content.slice(p, p + op.i.length);
if op.i != textToBeRemoved if (op.i !== textToBeRemoved) {
throw new ConsistencyError( throw new ConsistencyError(
"Inserted content, '#{op.i}', does not match text to be removed, '#{textToBeRemoved}'" `Inserted content, '${op.i}', does not match text to be removed, '${textToBeRemoved}'`
) );
return content.slice(0, p) + content.slice(p + op.i.length)
else if op.d?
return content.slice(0, op.p) + op.d + content.slice(op.p)
else
return content
rewindUpdates: (content, updates) ->
for update in updates.reverse()
try
content = DiffGenerator.rewindUpdate(content, update)
catch e
e.attempted_update = update # keep a record of the attempted update
throw e # rethrow the exception
return content
buildDiff: (initialContent, updates) ->
diff = [ u: initialContent ]
for update in updates
diff = DiffGenerator.applyUpdateToDiff diff, update
diff = DiffGenerator.compressDiff diff
return diff
compressDiff: (diff) ->
newDiff = []
for part in diff
lastPart = newDiff[newDiff.length - 1]
if lastPart? and lastPart.meta?.user? and part.meta?.user?
if lastPart.i? and part.i? and lastPart.meta.user.id == part.meta.user.id
lastPart.i += part.i
lastPart.meta.start_ts = Math.min(lastPart.meta.start_ts, part.meta.start_ts)
lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts)
else if lastPart.d? and part.d? and lastPart.meta.user.id == part.meta.user.id
lastPart.d += part.d
lastPart.meta.start_ts = Math.min(lastPart.meta.start_ts, part.meta.start_ts)
lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts)
else
newDiff.push part
else
newDiff.push part
return newDiff
applyOpToDiff: (diff, op, meta) ->
position = 0
remainingDiff = diff.slice()
{consumedDiff, remainingDiff} = DiffGenerator._consumeToOffset(remainingDiff, op.p)
newDiff = consumedDiff
if op.i?
newDiff.push
i: op.i
meta: meta
else if op.d?
{consumedDiff, remainingDiff} = DiffGenerator._consumeDiffAffectedByDeleteOp remainingDiff, op, meta
newDiff.push(consumedDiff...)
newDiff.push(remainingDiff...)
return newDiff
applyUpdateToDiff: (diff, update) ->
for op in update.op when op.broken isnt true
diff = DiffGenerator.applyOpToDiff diff, op, update.meta
return diff
_consumeToOffset: (remainingDiff, totalOffset) ->
consumedDiff = []
position = 0
while part = remainingDiff.shift()
length = DiffGenerator._getLengthOfDiffPart part
if part.d?
consumedDiff.push part
else if position + length >= totalOffset
partOffset = totalOffset - position
if partOffset > 0
consumedDiff.push DiffGenerator._slicePart part, 0, partOffset
if partOffset < length
remainingDiff.unshift DiffGenerator._slicePart part, partOffset
break
else
position += length
consumedDiff.push part
return {
consumedDiff: consumedDiff
remainingDiff: remainingDiff
} }
_consumeDiffAffectedByDeleteOp: (remainingDiff, deleteOp, meta) -> return content.slice(0, p) + content.slice(p + op.i.length);
consumedDiff = []
remainingOp = deleteOp } else if (op.d != null) {
while remainingOp and remainingDiff.length > 0 return content.slice(0, op.p) + op.d + content.slice(op.p);
{newPart, remainingDiff, remainingOp} = DiffGenerator._consumeDeletedPart remainingDiff, remainingOp, meta
consumedDiff.push newPart if newPart? } else {
return { return content;
consumedDiff: consumedDiff }
remainingDiff: remainingDiff },
rewindUpdates(content, updates) {
for (let update of Array.from(updates.reverse())) {
try {
content = DiffGenerator.rewindUpdate(content, update);
} catch (e) {
e.attempted_update = update; // keep a record of the attempted update
throw e; // rethrow the exception
}
}
return content;
},
buildDiff(initialContent, updates) {
let diff = [ {u: initialContent} ];
for (let update of Array.from(updates)) {
diff = DiffGenerator.applyUpdateToDiff(diff, update);
}
diff = DiffGenerator.compressDiff(diff);
return diff;
},
compressDiff(diff) {
const newDiff = [];
for (let part of Array.from(diff)) {
const lastPart = newDiff[newDiff.length - 1];
if ((lastPart != null) && ((lastPart.meta != null ? lastPart.meta.user : undefined) != null) && ((part.meta != null ? part.meta.user : undefined) != null)) {
if ((lastPart.i != null) && (part.i != null) && (lastPart.meta.user.id === part.meta.user.id)) {
lastPart.i += part.i;
lastPart.meta.start_ts = Math.min(lastPart.meta.start_ts, part.meta.start_ts);
lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts);
} else if ((lastPart.d != null) && (part.d != null) && (lastPart.meta.user.id === part.meta.user.id)) {
lastPart.d += part.d;
lastPart.meta.start_ts = Math.min(lastPart.meta.start_ts, part.meta.start_ts);
lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts);
} else {
newDiff.push(part);
}
} else {
newDiff.push(part);
}
}
return newDiff;
},
applyOpToDiff(diff, op, meta) {
let consumedDiff;
const position = 0;
let remainingDiff = diff.slice();
({consumedDiff, remainingDiff} = DiffGenerator._consumeToOffset(remainingDiff, op.p));
const newDiff = consumedDiff;
if (op.i != null) {
newDiff.push({
i: op.i,
meta
});
} else if (op.d != null) {
({consumedDiff, remainingDiff} = DiffGenerator._consumeDiffAffectedByDeleteOp(remainingDiff, op, meta));
newDiff.push(...Array.from(consumedDiff || []));
} }
_consumeDeletedPart: (remainingDiff, op, meta) -> newDiff.push(...Array.from(remainingDiff || []));
part = remainingDiff.shift()
partLength = DiffGenerator._getLengthOfDiffPart part
if part.d? return newDiff;
# Skip existing deletes },
remainingOp = op
newPart = part
else if partLength > op.d.length applyUpdateToDiff(diff, update) {
# Only the first bit of the part has been deleted for (let op of Array.from(update.op)) {
remainingPart = DiffGenerator._slicePart part, op.d.length if (op.broken !== true) {
remainingDiff.unshift remainingPart diff = DiffGenerator.applyOpToDiff(diff, op, update.meta);
}
}
return diff;
},
deletedContent = DiffGenerator._getContentOfPart(part).slice(0, op.d.length) _consumeToOffset(remainingDiff, totalOffset) {
if deletedContent != op.d let part;
throw new ConsistencyError("deleted content, '#{deletedContent}', does not match delete op, '#{op.d}'") const consumedDiff = [];
let position = 0;
while ((part = remainingDiff.shift())) {
const length = DiffGenerator._getLengthOfDiffPart(part);
if (part.d != null) {
consumedDiff.push(part);
} else if ((position + length) >= totalOffset) {
const partOffset = totalOffset - position;
if (partOffset > 0) {
consumedDiff.push(DiffGenerator._slicePart(part, 0, partOffset));
}
if (partOffset < length) {
remainingDiff.unshift(DiffGenerator._slicePart(part, partOffset));
}
break;
} else {
position += length;
consumedDiff.push(part);
}
}
if part.u? return {
newPart = consumedDiff,
d: op.d remainingDiff
meta: meta };
else if part.i? },
newPart = null
remainingOp = null _consumeDiffAffectedByDeleteOp(remainingDiff, deleteOp, meta) {
const consumedDiff = [];
let remainingOp = deleteOp;
while (remainingOp && (remainingDiff.length > 0)) {
let newPart;
({newPart, remainingDiff, remainingOp} = DiffGenerator._consumeDeletedPart(remainingDiff, remainingOp, meta));
if (newPart != null) { consumedDiff.push(newPart); }
}
return {
consumedDiff,
remainingDiff
};
},
else if partLength == op.d.length _consumeDeletedPart(remainingDiff, op, meta) {
# The entire part has been deleted, but it is the last part let deletedContent, newPart, remainingOp;
const part = remainingDiff.shift();
const partLength = DiffGenerator._getLengthOfDiffPart(part);
deletedContent = DiffGenerator._getContentOfPart(part) if (part.d != null) {
if deletedContent != op.d // Skip existing deletes
throw new ConsistencyError("deleted content, '#{deletedContent}', does not match delete op, '#{op.d}'") remainingOp = op;
newPart = part;
if part.u? } else if (partLength > op.d.length) {
newPart = // Only the first bit of the part has been deleted
d: op.d const remainingPart = DiffGenerator._slicePart(part, op.d.length);
meta: meta remainingDiff.unshift(remainingPart);
else if part.i?
newPart = null
remainingOp = null deletedContent = DiffGenerator._getContentOfPart(part).slice(0, op.d.length);
if (deletedContent !== op.d) {
throw new ConsistencyError(`deleted content, '${deletedContent}', does not match delete op, '${op.d}'`);
}
else if partLength < op.d.length if (part.u != null) {
# The entire part has been deleted and there is more newPart = {
d: op.d,
meta
};
} else if (part.i != null) {
newPart = null;
}
deletedContent = DiffGenerator._getContentOfPart(part) remainingOp = null;
opContent = op.d.slice(0, deletedContent.length)
if deletedContent != opContent
throw new ConsistencyError("deleted content, '#{deletedContent}', does not match delete op, '#{opContent}'")
if part.u } else if (partLength === op.d.length) {
newPart = // The entire part has been deleted, but it is the last part
d: part.u
meta: meta deletedContent = DiffGenerator._getContentOfPart(part);
else if part.i? if (deletedContent !== op.d) {
newPart = null throw new ConsistencyError(`deleted content, '${deletedContent}', does not match delete op, '${op.d}'`);
}
if (part.u != null) {
newPart = {
d: op.d,
meta
};
} else if (part.i != null) {
newPart = null;
}
remainingOp = null;
} else if (partLength < op.d.length) {
// The entire part has been deleted and there is more
deletedContent = DiffGenerator._getContentOfPart(part);
const opContent = op.d.slice(0, deletedContent.length);
if (deletedContent !== opContent) {
throw new ConsistencyError(`deleted content, '${deletedContent}', does not match delete op, '${opContent}'`);
}
if (part.u) {
newPart = {
d: part.u,
meta
};
} else if (part.i != null) {
newPart = null;
}
remainingOp = remainingOp =
p: op.p, d: op.d.slice(DiffGenerator._getLengthOfDiffPart(part)) {p: op.p, d: op.d.slice(DiffGenerator._getLengthOfDiffPart(part))};
return {
newPart: newPart
remainingDiff: remainingDiff
remainingOp: remainingOp
} }
_slicePart: (basePart, from, to) -> return {
if basePart.u? newPart,
part = { u: basePart.u.slice(from, to) } remainingDiff,
else if basePart.i? remainingOp
part = { i: basePart.i.slice(from, to) } };
if basePart.meta? },
part.meta = basePart.meta
return part
_getLengthOfDiffPart: (part) -> _slicePart(basePart, from, to) {
(part.u or part.d or part.i or '').length let part;
if (basePart.u != null) {
part = { u: basePart.u.slice(from, to) };
} else if (basePart.i != null) {
part = { i: basePart.i.slice(from, to) };
}
if (basePart.meta != null) {
part.meta = basePart.meta;
}
return part;
},
_getContentOfPart: (part) -> _getLengthOfDiffPart(part) {
part.u or part.d or part.i or '' return (part.u || part.d || part.i || '').length;
},
_getContentOfPart(part) {
return part.u || part.d || part.i || '';
}
});

View file

@ -1,88 +1,128 @@
UpdatesManager = require "./UpdatesManager" /*
DocumentUpdaterManager = require "./DocumentUpdaterManager" * decaffeinate suggestions:
DiffGenerator = require "./DiffGenerator" * DS101: Remove unnecessary use of Array.from
logger = require "logger-sharelatex" * DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let DiffManager;
const UpdatesManager = require("./UpdatesManager");
const DocumentUpdaterManager = require("./DocumentUpdaterManager");
const DiffGenerator = require("./DiffGenerator");
const logger = require("logger-sharelatex");
module.exports = DiffManager = module.exports = (DiffManager = {
getLatestDocAndUpdates: (project_id, doc_id, fromVersion, callback = (error, content, version, updates) ->) -> getLatestDocAndUpdates(project_id, doc_id, fromVersion, callback) {
# Get updates last, since then they must be ahead and it // Get updates last, since then they must be ahead and it
# might be possible to rewind to the same version as the doc. // might be possible to rewind to the same version as the doc.
DocumentUpdaterManager.getDocument project_id, doc_id, (error, content, version) -> if (callback == null) { callback = function(error, content, version, updates) {}; }
return callback(error) if error? return DocumentUpdaterManager.getDocument(project_id, doc_id, function(error, content, version) {
if !fromVersion? # If we haven't been given a version, just return lastest doc and no updates if (error != null) { return callback(error); }
return callback(null, content, version, []) if ((fromVersion == null)) { // If we haven't been given a version, just return lastest doc and no updates
UpdatesManager.getDocUpdatesWithUserInfo project_id, doc_id, from: fromVersion, (error, updates) -> return callback(null, content, version, []);
return callback(error) if error? }
callback(null, content, version, updates) return UpdatesManager.getDocUpdatesWithUserInfo(project_id, doc_id, {from: fromVersion}, function(error, updates) {
if (error != null) { return callback(error); }
return callback(null, content, version, updates);
});
});
},
getDiff: (project_id, doc_id, fromVersion, toVersion, callback = (error, diff) ->) -> getDiff(project_id, doc_id, fromVersion, toVersion, callback) {
DiffManager.getDocumentBeforeVersion project_id, doc_id, fromVersion, (error, startingContent, updates) -> if (callback == null) { callback = function(error, diff) {}; }
if error? return DiffManager.getDocumentBeforeVersion(project_id, doc_id, fromVersion, function(error, startingContent, updates) {
if error.message == "broken-history" let diff;
return callback(null, "history unavailable") if (error != null) {
else if (error.message === "broken-history") {
return callback(error) return callback(null, "history unavailable");
} else {
return callback(error);
}
}
updatesToApply = [] const updatesToApply = [];
for update in updates.slice().reverse() for (let update of Array.from(updates.slice().reverse())) {
if update.v <= toVersion if (update.v <= toVersion) {
updatesToApply.push update updatesToApply.push(update);
}
}
try try {
diff = DiffGenerator.buildDiff startingContent, updatesToApply diff = DiffGenerator.buildDiff(startingContent, updatesToApply);
catch e } catch (e) {
return callback(e) return callback(e);
}
callback(null, diff) return callback(null, diff);
});
},
getDocumentBeforeVersion: (project_id, doc_id, version, _callback = (error, document, rewoundUpdates) ->) -> getDocumentBeforeVersion(project_id, doc_id, version, _callback) {
# Whichever order we get the latest document and the latest updates, // Whichever order we get the latest document and the latest updates,
# there is potential for updates to be applied between them so that // there is potential for updates to be applied between them so that
# they do not return the same 'latest' versions. // they do not return the same 'latest' versions.
# If this happens, we just retry and hopefully get them at the compatible // If this happens, we just retry and hopefully get them at the compatible
# versions. // versions.
retries = 3 let retry;
callback = (error, args...) -> if (_callback == null) { _callback = function(error, document, rewoundUpdates) {}; }
if error? let retries = 3;
if error.retry and retries > 0 const callback = function(error, ...args) {
logger.warn {error, project_id, doc_id, version, retries}, "retrying getDocumentBeforeVersion" if (error != null) {
retry() if (error.retry && (retries > 0)) {
else logger.warn({error, project_id, doc_id, version, retries}, "retrying getDocumentBeforeVersion");
_callback(error) return retry();
else } else {
_callback(null, args...) return _callback(error);
}
} else {
return _callback(null, ...Array.from(args));
}
};
do retry = () -> return (retry = function() {
retries-- retries--;
DiffManager._tryGetDocumentBeforeVersion(project_id, doc_id, version, callback) return DiffManager._tryGetDocumentBeforeVersion(project_id, doc_id, version, callback);
})();
},
_tryGetDocumentBeforeVersion: (project_id, doc_id, version, callback = (error, document, rewoundUpdates) ->) -> _tryGetDocumentBeforeVersion(project_id, doc_id, version, callback) {
logger.log project_id: project_id, doc_id: doc_id, version: version, "getting document before version" if (callback == null) { callback = function(error, document, rewoundUpdates) {}; }
DiffManager.getLatestDocAndUpdates project_id, doc_id, version, (error, content, version, updates) -> logger.log({project_id, doc_id, version}, "getting document before version");
return callback(error) if error? return DiffManager.getLatestDocAndUpdates(project_id, doc_id, version, function(error, content, version, updates) {
let startingContent;
if (error != null) { return callback(error); }
# bail out if we hit a broken update // bail out if we hit a broken update
for u in updates when u.broken for (let u of Array.from(updates)) {
return callback new Error "broken-history" if (u.broken) {
return callback(new Error("broken-history"));
}
}
# discard any updates which are ahead of this document version // discard any updates which are ahead of this document version
while updates[0]?.v >= version while ((updates[0] != null ? updates[0].v : undefined) >= version) {
updates.shift() updates.shift();
}
lastUpdate = updates[0] const lastUpdate = updates[0];
if lastUpdate? and lastUpdate.v != version - 1 if ((lastUpdate != null) && (lastUpdate.v !== (version - 1))) {
error = new Error("latest update version, #{lastUpdate.v}, does not match doc version, #{version}") error = new Error(`latest update version, ${lastUpdate.v}, does not match doc version, ${version}`);
error.retry = true error.retry = true;
return callback error return callback(error);
}
logger.log {docVersion: version, lastUpdateVersion: lastUpdate?.v, updateCount: updates.length}, "rewinding updates" logger.log({docVersion: version, lastUpdateVersion: (lastUpdate != null ? lastUpdate.v : undefined), updateCount: updates.length}, "rewinding updates");
tryUpdates = updates.slice().reverse() const tryUpdates = updates.slice().reverse();
try try {
startingContent = DiffGenerator.rewindUpdates content, tryUpdates startingContent = DiffGenerator.rewindUpdates(content, tryUpdates);
# tryUpdates is reversed, and any unapplied ops are marked as broken // tryUpdates is reversed, and any unapplied ops are marked as broken
catch e } catch (e) {
return callback(e) return callback(e);
}
callback(null, startingContent, tryUpdates) return callback(null, startingContent, tryUpdates);
});
}
});

View file

@ -1,42 +1,63 @@
request = require "request" /*
logger = require "logger-sharelatex" * decaffeinate suggestions:
Settings = require "settings-sharelatex" * DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let DocumentUpdaterManager;
const request = require("request");
const logger = require("logger-sharelatex");
const Settings = require("settings-sharelatex");
module.exports = DocumentUpdaterManager = module.exports = (DocumentUpdaterManager = {
getDocument: (project_id, doc_id, callback = (error, content, version) ->) -> getDocument(project_id, doc_id, callback) {
url = "#{Settings.apis.documentupdater.url}/project/#{project_id}/doc/#{doc_id}" if (callback == null) { callback = function(error, content, version) {}; }
logger.log project_id:project_id, doc_id: doc_id, "getting doc from document updater" const url = `${Settings.apis.documentupdater.url}/project/${project_id}/doc/${doc_id}`;
request.get url, (error, res, body)-> logger.log({project_id, doc_id}, "getting doc from document updater");
if error? return request.get(url, function(error, res, body){
return callback(error) if (error != null) {
if res.statusCode >= 200 and res.statusCode < 300 return callback(error);
try }
body = JSON.parse(body) if ((res.statusCode >= 200) && (res.statusCode < 300)) {
catch error try {
return callback(error) body = JSON.parse(body);
logger.log {project_id, doc_id, version: body.version}, "got doc from document updater" } catch (error1) {
callback null, body.lines.join("\n"), body.version error = error1;
else return callback(error);
error = new Error("doc updater returned a non-success status code: #{res.statusCode}") }
logger.error err: error, project_id:project_id, doc_id:doc_id, url: url, "error accessing doc updater" logger.log({project_id, doc_id, version: body.version}, "got doc from document updater");
callback error return callback(null, body.lines.join("\n"), body.version);
} else {
error = new Error(`doc updater returned a non-success status code: ${res.statusCode}`);
logger.error({err: error, project_id, doc_id, url}, "error accessing doc updater");
return callback(error);
}
});
},
setDocument: (project_id, doc_id, content, user_id, callback = (error) ->) -> setDocument(project_id, doc_id, content, user_id, callback) {
url = "#{Settings.apis.documentupdater.url}/project/#{project_id}/doc/#{doc_id}" if (callback == null) { callback = function(error) {}; }
logger.log project_id:project_id, doc_id: doc_id, "setting doc in document updater" const url = `${Settings.apis.documentupdater.url}/project/${project_id}/doc/${doc_id}`;
request.post { logger.log({project_id, doc_id}, "setting doc in document updater");
url: url return request.post({
json: url,
lines: content.split("\n") json: {
source: "restore" lines: content.split("\n"),
user_id: user_id source: "restore",
user_id,
undoing: true undoing: true
}, (error, res, body)-> }
if error? }, function(error, res, body){
return callback(error) if (error != null) {
if res.statusCode >= 200 and res.statusCode < 300 return callback(error);
callback null }
else if ((res.statusCode >= 200) && (res.statusCode < 300)) {
error = new Error("doc updater returned a non-success status code: #{res.statusCode}") return callback(null);
logger.error err: error, project_id:project_id, doc_id:doc_id, url: url, "error accessing doc updater" } else {
callback error error = new Error(`doc updater returned a non-success status code: ${res.statusCode}`);
logger.error({err: error, project_id, doc_id, url}, "error accessing doc updater");
return callback(error);
}
});
}
});

View file

@ -1,46 +1,64 @@
ObjectId = require("mongojs").ObjectId /*
request = require("request") * decaffeinate suggestions:
async = require("async") * DS102: Remove unnecessary code created because of implicit returns
settings = require("settings-sharelatex") * DS207: Consider shorter variations of null checks
port = settings.internal.trackchanges.port * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
logger = require "logger-sharelatex" */
LockManager = require "./LockManager" const { ObjectId } = require("mongojs");
const request = require("request");
const async = require("async");
const settings = require("settings-sharelatex");
const { port } = settings.internal.trackchanges;
const logger = require("logger-sharelatex");
const LockManager = require("./LockManager");
module.exports = module.exports = {
check : (callback)-> check(callback){
project_id = ObjectId(settings.trackchanges.healthCheck.project_id) const project_id = ObjectId(settings.trackchanges.healthCheck.project_id);
url = "http://localhost:#{port}/project/#{project_id}" const url = `http://localhost:${port}/project/${project_id}`;
logger.log project_id:project_id, "running health check" logger.log({project_id}, "running health check");
jobs = [ const jobs = [
(cb)-> cb=>
request.get {url:"http://localhost:#{port}/check_lock", timeout:3000}, (err, res, body) -> request.get({url:`http://localhost:${port}/check_lock`, timeout:3000}, function(err, res, body) {
if err? if (err != null) {
logger.err err:err, project_id:project_id, "error checking lock for health check" logger.err({err, project_id}, "error checking lock for health check");
cb(err) return cb(err);
else if res?.statusCode != 200 } else if ((res != null ? res.statusCode : undefined) !== 200) {
cb("status code not 200, it's #{res.statusCode}") return cb(`status code not 200, it's ${res.statusCode}`);
else } else {
cb() return cb();
(cb)-> }
request.post {url:"#{url}/flush", timeout:10000}, (err, res, body) -> })
if err? ,
logger.err err:err, project_id:project_id, "error flushing for health check" cb=>
cb(err) request.post({url:`${url}/flush`, timeout:10000}, function(err, res, body) {
else if res?.statusCode != 204 if (err != null) {
cb("status code not 204, it's #{res.statusCode}") logger.err({err, project_id}, "error flushing for health check");
else return cb(err);
cb() } else if ((res != null ? res.statusCode : undefined) !== 204) {
(cb)-> return cb(`status code not 204, it's ${res.statusCode}`);
request.get {url:"#{url}/updates", timeout:10000}, (err, res, body)-> } else {
if err? return cb();
logger.err err:err, project_id:project_id, "error getting updates for health check" }
cb(err) })
else if res?.statusCode != 200 ,
cb("status code not 200, it's #{res.statusCode}") cb=>
else request.get({url:`${url}/updates`, timeout:10000}, function(err, res, body){
cb() if (err != null) {
] logger.err({err, project_id}, "error getting updates for health check");
async.series jobs, callback return cb(err);
} else if ((res != null ? res.statusCode : undefined) !== 200) {
return cb(`status code not 200, it's ${res.statusCode}`);
} else {
return cb();
}
})
checkLock: (callback) -> ];
LockManager.healthCheck callback return async.series(jobs, callback);
},
checkLock(callback) {
return LockManager.healthCheck(callback);
}
};

View file

@ -1,137 +1,195 @@
UpdatesManager = require "./UpdatesManager" /*
DiffManager = require "./DiffManager" * decaffeinate suggestions:
PackManager = require "./PackManager" * DS101: Remove unnecessary use of Array.from
RestoreManager = require "./RestoreManager" * DS102: Remove unnecessary code created because of implicit returns
logger = require "logger-sharelatex" * DS207: Consider shorter variations of null checks
HealthChecker = require "./HealthChecker" * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
_ = require "underscore" */
let HttpController;
const UpdatesManager = require("./UpdatesManager");
const DiffManager = require("./DiffManager");
const PackManager = require("./PackManager");
const RestoreManager = require("./RestoreManager");
const logger = require("logger-sharelatex");
const HealthChecker = require("./HealthChecker");
const _ = require("underscore");
module.exports = HttpController = module.exports = (HttpController = {
flushDoc: (req, res, next = (error) ->) -> flushDoc(req, res, next) {
doc_id = req.params.doc_id if (next == null) { next = function(error) {}; }
project_id = req.params.project_id const { doc_id } = req.params;
logger.log project_id: project_id, doc_id: doc_id, "compressing doc history" const { project_id } = req.params;
UpdatesManager.processUncompressedUpdatesWithLock project_id, doc_id, (error) -> logger.log({project_id, doc_id}, "compressing doc history");
return next(error) if error? return UpdatesManager.processUncompressedUpdatesWithLock(project_id, doc_id, function(error) {
res.send 204 if (error != null) { return next(error); }
return res.send(204);
});
},
flushProject: (req, res, next = (error) ->) -> flushProject(req, res, next) {
project_id = req.params.project_id if (next == null) { next = function(error) {}; }
logger.log project_id: project_id, "compressing project history" const { project_id } = req.params;
UpdatesManager.processUncompressedUpdatesForProject project_id, (error) -> logger.log({project_id}, "compressing project history");
return next(error) if error? return UpdatesManager.processUncompressedUpdatesForProject(project_id, function(error) {
res.send 204 if (error != null) { return next(error); }
return res.send(204);
});
},
flushAll: (req, res, next = (error) ->) -> flushAll(req, res, next) {
# limit on projects to flush or -1 for all (default) // limit on projects to flush or -1 for all (default)
limit = if req.query.limit? then parseInt(req.query.limit, 10) else -1 if (next == null) { next = function(error) {}; }
logger.log {limit: limit}, "flushing all projects" const limit = (req.query.limit != null) ? parseInt(req.query.limit, 10) : -1;
UpdatesManager.flushAll limit, (error, result) -> logger.log({limit}, "flushing all projects");
return next(error) if error? return UpdatesManager.flushAll(limit, function(error, result) {
{failed, succeeded, all} = result if (error != null) { return next(error); }
status = "#{succeeded.length} succeeded, #{failed.length} failed" const {failed, succeeded, all} = result;
if limit == 0 const status = `${succeeded.length} succeeded, ${failed.length} failed`;
res.status(200).send "#{status}\nwould flush:\n#{all.join('\n')}\n" if (limit === 0) {
else if failed.length > 0 return res.status(200).send(`${status}\nwould flush:\n${all.join('\n')}\n`);
logger.log {failed: failed, succeeded: succeeded}, "error flushing projects" } else if (failed.length > 0) {
res.status(500).send "#{status}\nfailed to flush:\n#{failed.join('\n')}\n" logger.log({failed, succeeded}, "error flushing projects");
else return res.status(500).send(`${status}\nfailed to flush:\n${failed.join('\n')}\n`);
res.status(200).send "#{status}\nflushed #{succeeded.length} projects of #{all.length}\n" } else {
return res.status(200).send(`${status}\nflushed ${succeeded.length} projects of ${all.length}\n`);
}
});
},
checkDanglingUpdates: (req, res, next = (error) ->) -> checkDanglingUpdates(req, res, next) {
logger.log "checking dangling updates" if (next == null) { next = function(error) {}; }
UpdatesManager.getDanglingUpdates (error, result) -> logger.log("checking dangling updates");
return next(error) if error? return UpdatesManager.getDanglingUpdates(function(error, result) {
if result.length > 0 if (error != null) { return next(error); }
logger.log {dangling: result}, "found dangling updates" if (result.length > 0) {
res.status(500).send "dangling updates:\n#{result.join('\n')}\n" logger.log({dangling: result}, "found dangling updates");
else return res.status(500).send(`dangling updates:\n${result.join('\n')}\n`);
res.status(200).send "no dangling updates found\n" } else {
return res.status(200).send("no dangling updates found\n");
}
});
},
checkDoc: (req, res, next = (error) ->) -> checkDoc(req, res, next) {
doc_id = req.params.doc_id if (next == null) { next = function(error) {}; }
project_id = req.params.project_id const { doc_id } = req.params;
logger.log project_id: project_id, doc_id: doc_id, "checking doc history" const { project_id } = req.params;
DiffManager.getDocumentBeforeVersion project_id, doc_id, 1, (error, document, rewoundUpdates) -> logger.log({project_id, doc_id}, "checking doc history");
return next(error) if error? return DiffManager.getDocumentBeforeVersion(project_id, doc_id, 1, function(error, document, rewoundUpdates) {
broken = [] if (error != null) { return next(error); }
for update in rewoundUpdates const broken = [];
for op in update.op when op.broken is true for (let update of Array.from(rewoundUpdates)) {
broken.push op for (let op of Array.from(update.op)) {
if broken.length > 0 if (op.broken === true) {
res.send broken broken.push(op);
else }
res.send 204 }
}
if (broken.length > 0) {
return res.send(broken);
} else {
return res.send(204);
}
});
},
getDiff: (req, res, next = (error) ->) -> getDiff(req, res, next) {
doc_id = req.params.doc_id let from, to;
project_id = req.params.project_id if (next == null) { next = function(error) {}; }
const { doc_id } = req.params;
const { project_id } = req.params;
if req.query.from? if (req.query.from != null) {
from = parseInt(req.query.from, 10) from = parseInt(req.query.from, 10);
else } else {
from = null from = null;
if req.query.to? }
to = parseInt(req.query.to, 10) if (req.query.to != null) {
else to = parseInt(req.query.to, 10);
to = null } else {
to = null;
logger.log {project_id, doc_id, from, to}, "getting diff"
DiffManager.getDiff project_id, doc_id, from, to, (error, diff) ->
return next(error) if error?
res.json {diff: diff}
getUpdates: (req, res, next = (error) ->) ->
project_id = req.params.project_id
if req.query.before?
before = parseInt(req.query.before, 10)
if req.query.min_count?
min_count = parseInt(req.query.min_count, 10)
UpdatesManager.getSummarizedProjectUpdates project_id, before: before, min_count: min_count, (error, updates, nextBeforeTimestamp) ->
return next(error) if error?
res.json {
updates: updates
nextBeforeTimestamp: nextBeforeTimestamp
} }
restore: (req, res, next = (error) ->) -> logger.log({project_id, doc_id, from, to}, "getting diff");
{doc_id, project_id, version} = req.params return DiffManager.getDiff(project_id, doc_id, from, to, function(error, diff) {
user_id = req.headers["x-user-id"] if (error != null) { return next(error); }
version = parseInt(version, 10) return res.json({diff});
RestoreManager.restoreToBeforeVersion project_id, doc_id, version, user_id, (error) -> });
return next(error) if error? },
res.send 204
pushDocHistory: (req, res, next = (error) ->) -> getUpdates(req, res, next) {
project_id = req.params.project_id let before, min_count;
doc_id = req.params.doc_id if (next == null) { next = function(error) {}; }
logger.log {project_id, doc_id}, "pushing all finalised changes to s3" const { project_id } = req.params;
PackManager.pushOldPacks project_id, doc_id, (error) ->
return next(error) if error?
res.send 204
pullDocHistory: (req, res, next = (error) ->) -> if (req.query.before != null) {
project_id = req.params.project_id before = parseInt(req.query.before, 10);
doc_id = req.params.doc_id }
logger.log {project_id, doc_id}, "pulling all packs from s3" if (req.query.min_count != null) {
PackManager.pullOldPacks project_id, doc_id, (error) -> min_count = parseInt(req.query.min_count, 10);
return next(error) if error? }
res.send 204
healthCheck: (req, res)-> return UpdatesManager.getSummarizedProjectUpdates(project_id, {before, min_count}, function(error, updates, nextBeforeTimestamp) {
HealthChecker.check (err)-> if (error != null) { return next(error); }
if err? return res.json({
logger.err err:err, "error performing health check" updates,
res.send 500 nextBeforeTimestamp
else });
res.send 200 });
},
checkLock: (req, res)-> restore(req, res, next) {
HealthChecker.checkLock (err) -> if (next == null) { next = function(error) {}; }
if err? let {doc_id, project_id, version} = req.params;
logger.err err:err, "error performing lock check" const user_id = req.headers["x-user-id"];
res.send 500 version = parseInt(version, 10);
else return RestoreManager.restoreToBeforeVersion(project_id, doc_id, version, user_id, function(error) {
res.send 200 if (error != null) { return next(error); }
return res.send(204);
});
},
pushDocHistory(req, res, next) {
if (next == null) { next = function(error) {}; }
const { project_id } = req.params;
const { doc_id } = req.params;
logger.log({project_id, doc_id}, "pushing all finalised changes to s3");
return PackManager.pushOldPacks(project_id, doc_id, function(error) {
if (error != null) { return next(error); }
return res.send(204);
});
},
pullDocHistory(req, res, next) {
if (next == null) { next = function(error) {}; }
const { project_id } = req.params;
const { doc_id } = req.params;
logger.log({project_id, doc_id}, "pulling all packs from s3");
return PackManager.pullOldPacks(project_id, doc_id, function(error) {
if (error != null) { return next(error); }
return res.send(204);
});
},
healthCheck(req, res){
return HealthChecker.check(function(err){
if (err != null) {
logger.err({err}, "error performing health check");
return res.send(500);
} else {
return res.send(200);
}
});
},
checkLock(req, res){
return HealthChecker.checkLock(function(err) {
if (err != null) {
logger.err({err}, "error performing lock check");
return res.send(500);
} else {
return res.send(200);
}
});
}
});

View file

@ -1,85 +1,119 @@
Settings = require "settings-sharelatex" /*
redis = require("redis-sharelatex") * decaffeinate suggestions:
rclient = redis.createClient(Settings.redis.lock) * DS102: Remove unnecessary code created because of implicit returns
os = require "os" * DS207: Consider shorter variations of null checks
crypto = require "crypto" * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
logger = require "logger-sharelatex" */
let LockManager;
const Settings = require("settings-sharelatex");
const redis = require("redis-sharelatex");
const rclient = redis.createClient(Settings.redis.lock);
const os = require("os");
const crypto = require("crypto");
const logger = require("logger-sharelatex");
HOST = os.hostname() const HOST = os.hostname();
PID = process.pid const PID = process.pid;
RND = crypto.randomBytes(4).toString('hex') const RND = crypto.randomBytes(4).toString('hex');
COUNT = 0 let COUNT = 0;
module.exports = LockManager = module.exports = (LockManager = {
LOCK_TEST_INTERVAL: 50 # 50ms between each test of the lock LOCK_TEST_INTERVAL: 50, // 50ms between each test of the lock
MAX_LOCK_WAIT_TIME: 10000 # 10s maximum time to spend trying to get the lock MAX_LOCK_WAIT_TIME: 10000, // 10s maximum time to spend trying to get the lock
LOCK_TTL: 300 # seconds (allow 5 minutes for any operation to complete) LOCK_TTL: 300, // seconds (allow 5 minutes for any operation to complete)
# Use a signed lock value as described in // Use a signed lock value as described in
# http://redis.io/topics/distlock#correct-implementation-with-a-single-instance // http://redis.io/topics/distlock#correct-implementation-with-a-single-instance
# to prevent accidental unlocking by multiple processes // to prevent accidental unlocking by multiple processes
randomLock : () -> randomLock() {
time = Date.now() const time = Date.now();
return "locked:host=#{HOST}:pid=#{PID}:random=#{RND}:time=#{time}:count=#{COUNT++}" return `locked:host=${HOST}:pid=${PID}:random=${RND}:time=${time}:count=${COUNT++}`;
},
unlockScript: 'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("del", KEYS[1]) else return 0 end'; unlockScript: 'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("del", KEYS[1]) else return 0 end',
tryLock : (key, callback = (err, gotLock) ->) -> tryLock(key, callback) {
lockValue = LockManager.randomLock() if (callback == null) { callback = function(err, gotLock) {}; }
rclient.set key, lockValue, "EX", @LOCK_TTL, "NX", (err, gotLock)-> const lockValue = LockManager.randomLock();
return callback(err) if err? return rclient.set(key, lockValue, "EX", this.LOCK_TTL, "NX", function(err, gotLock){
if gotLock == "OK" if (err != null) { return callback(err); }
callback err, true, lockValue if (gotLock === "OK") {
else return callback(err, true, lockValue);
callback err, false } else {
return callback(err, false);
}
});
},
getLock: (key, callback = (error) ->) -> getLock(key, callback) {
startTime = Date.now() let attempt;
do attempt = () -> if (callback == null) { callback = function(error) {}; }
if Date.now() - startTime > LockManager.MAX_LOCK_WAIT_TIME const startTime = Date.now();
e = new Error("Timeout") return (attempt = function() {
e.key = key if ((Date.now() - startTime) > LockManager.MAX_LOCK_WAIT_TIME) {
return callback(e) const e = new Error("Timeout");
e.key = key;
return callback(e);
}
LockManager.tryLock key, (error, gotLock, lockValue) -> return LockManager.tryLock(key, function(error, gotLock, lockValue) {
return callback(error) if error? if (error != null) { return callback(error); }
if gotLock if (gotLock) {
callback(null, lockValue) return callback(null, lockValue);
else } else {
setTimeout attempt, LockManager.LOCK_TEST_INTERVAL return setTimeout(attempt, LockManager.LOCK_TEST_INTERVAL);
}
});
})();
},
checkLock: (key, callback = (err, isFree) ->) -> checkLock(key, callback) {
rclient.exists key, (err, exists) -> if (callback == null) { callback = function(err, isFree) {}; }
return callback(err) if err? return rclient.exists(key, function(err, exists) {
exists = parseInt exists if (err != null) { return callback(err); }
if exists == 1 exists = parseInt(exists);
callback err, false if (exists === 1) {
else return callback(err, false);
callback err, true } else {
return callback(err, true);
}
});
},
releaseLock: (key, lockValue, callback) -> releaseLock(key, lockValue, callback) {
rclient.eval LockManager.unlockScript, 1, key, lockValue, (err, result) -> return rclient.eval(LockManager.unlockScript, 1, key, lockValue, function(err, result) {
if err? if (err != null) {
return callback(err) return callback(err);
if result? and result isnt 1 # successful unlock should release exactly one key }
logger.error {key:key, lockValue:lockValue, redis_err:err, redis_result:result}, "unlocking error" if ((result != null) && (result !== 1)) { // successful unlock should release exactly one key
return callback(new Error("tried to release timed out lock")) logger.error({key, lockValue, redis_err:err, redis_result:result}, "unlocking error");
callback(err,result) return callback(new Error("tried to release timed out lock"));
}
return callback(err,result);
});
},
runWithLock: (key, runner, callback = ( (error) -> )) -> runWithLock(key, runner, callback) {
LockManager.getLock key, (error, lockValue) -> if (callback == null) { callback = function(error) {}; }
return callback(error) if error? return LockManager.getLock(key, function(error, lockValue) {
runner (error1) -> if (error != null) { return callback(error); }
LockManager.releaseLock key, lockValue, (error2) -> return runner(error1 =>
error = error1 or error2 LockManager.releaseLock(key, lockValue, function(error2) {
return callback(error) if error? error = error1 || error2;
callback() if (error != null) { return callback(error); }
return callback();
})
);
});
},
healthCheck: (callback) -> healthCheck(callback) {
action = (releaseLock) -> const action = releaseLock => releaseLock();
releaseLock() return LockManager.runWithLock(`HistoryLock:HealthCheck:host=${HOST}:pid=${PID}:random=${RND}`, action, callback);
LockManager.runWithLock "HistoryLock:HealthCheck:host=#{HOST}:pid=#{PID}:random=#{RND}", action, callback },
close: (callback) -> close(callback) {
rclient.quit() rclient.quit();
rclient.once 'end', callback return rclient.once('end', callback);
}
});

View file

@ -1,118 +1,141 @@
settings = require "settings-sharelatex" /*
logger = require "logger-sharelatex" * decaffeinate suggestions:
AWS = require 'aws-sdk' * DS101: Remove unnecessary use of Array.from
S3S = require 's3-streams' * DS102: Remove unnecessary code created because of implicit returns
{db, ObjectId} = require "./mongojs" * DS207: Consider shorter variations of null checks
JSONStream = require "JSONStream" * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
ReadlineStream = require "byline" */
zlib = require "zlib" let MongoAWS;
Metrics = require "metrics-sharelatex" const settings = require("settings-sharelatex");
const logger = require("logger-sharelatex");
const AWS = require('aws-sdk');
const S3S = require('s3-streams');
const {db, ObjectId} = require("./mongojs");
const JSONStream = require("JSONStream");
const ReadlineStream = require("byline");
const zlib = require("zlib");
const Metrics = require("metrics-sharelatex");
DAYS = 24 * 3600 * 1000 # one day in milliseconds const DAYS = 24 * 3600 * 1000; // one day in milliseconds
createStream = (streamConstructor, project_id, doc_id, pack_id) -> const createStream = function(streamConstructor, project_id, doc_id, pack_id) {
AWS_CONFIG = const AWS_CONFIG = {
accessKeyId: settings.trackchanges.s3.key accessKeyId: settings.trackchanges.s3.key,
secretAccessKey: settings.trackchanges.s3.secret secretAccessKey: settings.trackchanges.s3.secret,
endpoint: settings.trackchanges.s3.endpoint endpoint: settings.trackchanges.s3.endpoint,
s3ForcePathStyle: settings.trackchanges.s3.pathStyle s3ForcePathStyle: settings.trackchanges.s3.pathStyle
};
return streamConstructor new AWS.S3(AWS_CONFIG), { return streamConstructor(new AWS.S3(AWS_CONFIG), {
"Bucket": settings.trackchanges.stores.doc_history, "Bucket": settings.trackchanges.stores.doc_history,
"Key": project_id+"/changes-"+doc_id+"/pack-"+pack_id "Key": project_id+"/changes-"+doc_id+"/pack-"+pack_id
} });
};
module.exports = MongoAWS = module.exports = (MongoAWS = {
archivePack: (project_id, doc_id, pack_id, _callback = (error) ->) -> archivePack(project_id, doc_id, pack_id, _callback) {
callback = (args...) -> if (_callback == null) { _callback = function(error) {}; }
_callback(args...) const callback = function(...args) {
_callback = () -> _callback(...Array.from(args || []));
return _callback = function() {};
};
query = { const query = {
_id: ObjectId(pack_id) _id: ObjectId(pack_id),
doc_id: ObjectId(doc_id) doc_id: ObjectId(doc_id)
};
if ((project_id == null)) { return callback(new Error("invalid project id")); }
if ((doc_id == null)) { return callback(new Error("invalid doc id")); }
if ((pack_id == null)) { return callback(new Error("invalid pack id")); }
logger.log({project_id, doc_id, pack_id}, "uploading data to s3");
const upload = createStream(S3S.WriteStream, project_id, doc_id, pack_id);
return db.docHistory.findOne(query, function(err, result) {
if (err != null) { return callback(err); }
if ((result == null)) { return callback(new Error("cannot find pack to send to s3")); }
if (result.expiresAt != null) { return callback(new Error("refusing to send pack with TTL to s3")); }
const uncompressedData = JSON.stringify(result);
if (uncompressedData.indexOf("\u0000") !== -1) {
const error = new Error("null bytes found in upload");
logger.error({err: error, project_id, doc_id, pack_id}, error.message);
return callback(error);
} }
return zlib.gzip(uncompressedData, function(err, buf) {
logger.log({project_id, doc_id, pack_id, origSize: uncompressedData.length, newSize: buf.length}, "compressed pack");
if (err != null) { return callback(err); }
upload.on('error', err => callback(err));
upload.on('finish', function() {
Metrics.inc("archive-pack");
logger.log({project_id, doc_id, pack_id}, "upload to s3 completed");
return callback(null);
});
upload.write(buf);
return upload.end();
});
});
},
return callback new Error("invalid project id") if not project_id? readArchivedPack(project_id, doc_id, pack_id, _callback) {
return callback new Error("invalid doc id") if not doc_id? if (_callback == null) { _callback = function(error, result) {}; }
return callback new Error("invalid pack id") if not pack_id? const callback = function(...args) {
_callback(...Array.from(args || []));
return _callback = function() {};
};
logger.log {project_id, doc_id, pack_id}, "uploading data to s3" if ((project_id == null)) { return callback(new Error("invalid project id")); }
if ((doc_id == null)) { return callback(new Error("invalid doc id")); }
if ((pack_id == null)) { return callback(new Error("invalid pack id")); }
upload = createStream S3S.WriteStream, project_id, doc_id, pack_id logger.log({project_id, doc_id, pack_id}, "downloading data from s3");
db.docHistory.findOne query, (err, result) -> const download = createStream(S3S.ReadStream, project_id, doc_id, pack_id);
return callback(err) if err?
return callback new Error("cannot find pack to send to s3") if not result?
return callback new Error("refusing to send pack with TTL to s3") if result.expiresAt?
uncompressedData = JSON.stringify(result)
if uncompressedData.indexOf("\u0000") != -1
error = new Error("null bytes found in upload")
logger.error err: error, project_id: project_id, doc_id: doc_id, pack_id: pack_id, error.message
return callback(error)
zlib.gzip uncompressedData, (err, buf) ->
logger.log {project_id, doc_id, pack_id, origSize: uncompressedData.length, newSize: buf.length}, "compressed pack"
return callback(err) if err?
upload.on 'error', (err) ->
callback(err)
upload.on 'finish', () ->
Metrics.inc("archive-pack")
logger.log {project_id, doc_id, pack_id}, "upload to s3 completed"
callback(null)
upload.write buf
upload.end()
readArchivedPack: (project_id, doc_id, pack_id, _callback = (error, result) ->) -> const inputStream = download
callback = (args...) -> .on('open', obj => 1).on('error', err => callback(err));
_callback(args...)
_callback = () ->
return callback new Error("invalid project id") if not project_id? const gunzip = zlib.createGunzip();
return callback new Error("invalid doc id") if not doc_id? gunzip.setEncoding('utf8');
return callback new Error("invalid pack id") if not pack_id? gunzip.on('error', function(err) {
logger.log({project_id, doc_id, pack_id, err}, "error uncompressing gzip stream");
return callback(err);
});
logger.log {project_id, doc_id, pack_id}, "downloading data from s3" const outputStream = inputStream.pipe(gunzip);
const parts = [];
outputStream.on('error', err => callback(err));
outputStream.on('end', function() {
let object;
logger.log({project_id, doc_id, pack_id}, "download from s3 completed");
try {
object = JSON.parse(parts.join(''));
} catch (e) {
return callback(e);
}
object._id = ObjectId(object._id);
object.doc_id = ObjectId(object.doc_id);
object.project_id = ObjectId(object.project_id);
for (let op of Array.from(object.pack)) {
if (op._id != null) { op._id = ObjectId(op._id); }
}
return callback(null, object);
});
return outputStream.on('data', data => parts.push(data));
},
download = createStream S3S.ReadStream, project_id, doc_id, pack_id unArchivePack(project_id, doc_id, pack_id, callback) {
if (callback == null) { callback = function(error) {}; }
inputStream = download return MongoAWS.readArchivedPack(project_id, doc_id, pack_id, function(err, object) {
.on 'open', (obj) -> if (err != null) { return callback(err); }
return 1 Metrics.inc("unarchive-pack");
.on 'error', (err) -> // allow the object to expire, we can always retrieve it again
callback(err) object.expiresAt = new Date(Date.now() + (7 * DAYS));
logger.log({project_id, doc_id, pack_id}, "inserting object from s3");
gunzip = zlib.createGunzip() return db.docHistory.insert(object, callback);
gunzip.setEncoding('utf8') });
gunzip.on 'error', (err) -> }
logger.log {project_id, doc_id, pack_id, err}, "error uncompressing gzip stream" });
callback(err)
outputStream = inputStream.pipe gunzip
parts = []
outputStream.on 'error', (err) ->
return callback(err)
outputStream.on 'end', () ->
logger.log {project_id, doc_id, pack_id}, "download from s3 completed"
try
object = JSON.parse parts.join('')
catch e
return callback(e)
object._id = ObjectId(object._id)
object.doc_id = ObjectId(object.doc_id)
object.project_id = ObjectId(object.project_id)
for op in object.pack
op._id = ObjectId(op._id) if op._id?
callback null, object
outputStream.on 'data', (data) ->
parts.push data
unArchivePack: (project_id, doc_id, pack_id, callback = (error) ->) ->
MongoAWS.readArchivedPack project_id, doc_id, pack_id, (err, object) ->
return callback(err) if err?
Metrics.inc("unarchive-pack")
# allow the object to expire, we can always retrieve it again
object.expiresAt = new Date(Date.now() + 7 * DAYS)
logger.log {project_id, doc_id, pack_id}, "inserting object from s3"
db.docHistory.insert object, callback

View file

@ -1,104 +1,131 @@
{db, ObjectId} = require "./mongojs" /*
PackManager = require "./PackManager" * decaffeinate suggestions:
async = require "async" * DS102: Remove unnecessary code created because of implicit returns
_ = require "underscore" * DS207: Consider shorter variations of null checks
metrics = require 'metrics-sharelatex' * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
logger = require 'logger-sharelatex' */
let MongoManager;
const {db, ObjectId} = require("./mongojs");
const PackManager = require("./PackManager");
const async = require("async");
const _ = require("underscore");
const metrics = require('metrics-sharelatex');
const logger = require('logger-sharelatex');
module.exports = MongoManager = module.exports = (MongoManager = {
getLastCompressedUpdate: (doc_id, callback = (error, update) ->) -> getLastCompressedUpdate(doc_id, callback) {
db.docHistory if (callback == null) { callback = function(error, update) {}; }
.find(doc_id: ObjectId(doc_id.toString()), {pack: {$slice:-1}}) # only return the last entry in a pack return db.docHistory
.sort( v: -1 ) .find({doc_id: ObjectId(doc_id.toString())}, {pack: {$slice:-1}}) // only return the last entry in a pack
.sort({ v: -1 })
.limit(1) .limit(1)
.toArray (error, compressedUpdates) -> .toArray(function(error, compressedUpdates) {
return callback(error) if error? if (error != null) { return callback(error); }
callback null, compressedUpdates[0] or null return callback(null, compressedUpdates[0] || null);
});
},
peekLastCompressedUpdate: (doc_id, callback = (error, update, version) ->) -> peekLastCompressedUpdate(doc_id, callback) {
# under normal use we pass back the last update as // under normal use we pass back the last update as
# callback(null,update,version). // callback(null,update,version).
# //
# when we have an existing last update but want to force a new one // when we have an existing last update but want to force a new one
# to start, we pass it back as callback(null,null,version), just // to start, we pass it back as callback(null,null,version), just
# giving the version so we can check consistency. // giving the version so we can check consistency.
MongoManager.getLastCompressedUpdate doc_id, (error, update) -> if (callback == null) { callback = function(error, update, version) {}; }
return callback(error) if error? return MongoManager.getLastCompressedUpdate(doc_id, function(error, update) {
if update? if (error != null) { return callback(error); }
if update.broken # marked as broken so we will force a new op if (update != null) {
return callback null, null if (update.broken) { // marked as broken so we will force a new op
else if update.pack? return callback(null, null);
if update.finalised # no more ops can be appended } else if (update.pack != null) {
return callback null, null, update.pack[0]?.v if (update.finalised) { // no more ops can be appended
else return callback(null, null, update.pack[0] != null ? update.pack[0].v : undefined);
return callback null, update, update.pack[0]?.v } else {
else return callback(null, update, update.pack[0] != null ? update.pack[0].v : undefined);
return callback null, update, update.v }
else } else {
PackManager.getLastPackFromIndex doc_id, (error, pack) -> return callback(null, update, update.v);
return callback(error) if error? }
return callback(null, null, pack.v_end) if pack?.inS3? and pack?.v_end? } else {
callback null, null return PackManager.getLastPackFromIndex(doc_id, function(error, pack) {
if (error != null) { return callback(error); }
if (((pack != null ? pack.inS3 : undefined) != null) && ((pack != null ? pack.v_end : undefined) != null)) { return callback(null, null, pack.v_end); }
return callback(null, null);
});
}
});
},
backportProjectId: (project_id, doc_id, callback = (error) ->) -> backportProjectId(project_id, doc_id, callback) {
db.docHistory.update { if (callback == null) { callback = function(error) {}; }
doc_id: ObjectId(doc_id.toString()) return db.docHistory.update({
doc_id: ObjectId(doc_id.toString()),
project_id: { $exists: false } project_id: { $exists: false }
}, { }, {
$set: { project_id: ObjectId(project_id.toString()) } $set: { project_id: ObjectId(project_id.toString()) }
}, { }, {
multi: true multi: true
}, callback }, callback);
},
getProjectMetaData: (project_id, callback = (error, metadata) ->) -> getProjectMetaData(project_id, callback) {
db.projectHistoryMetaData.find { if (callback == null) { callback = function(error, metadata) {}; }
return db.projectHistoryMetaData.find({
project_id: ObjectId(project_id.toString()) project_id: ObjectId(project_id.toString())
}, (error, results) -> }, function(error, results) {
return callback(error) if error? if (error != null) { return callback(error); }
callback null, results[0] return callback(null, results[0]);
});
},
setProjectMetaData: (project_id, metadata, callback = (error) ->) -> setProjectMetaData(project_id, metadata, callback) {
db.projectHistoryMetaData.update { if (callback == null) { callback = function(error) {}; }
return db.projectHistoryMetaData.update({
project_id: ObjectId(project_id) project_id: ObjectId(project_id)
}, { }, {
$set: metadata $set: metadata
}, { }, {
upsert: true upsert: true
}, callback }, callback);
},
upgradeHistory: (project_id, callback = (error) ->) -> upgradeHistory(project_id, callback) {
# preserve the project's existing history // preserve the project's existing history
db.docHistory.update { if (callback == null) { callback = function(error) {}; }
project_id: ObjectId(project_id) return db.docHistory.update({
temporary: true project_id: ObjectId(project_id),
temporary: true,
expiresAt: {$exists: true} expiresAt: {$exists: true}
}, { }, {
$set: {temporary: false} $set: {temporary: false},
$unset: {expiresAt: ""} $unset: {expiresAt: ""}
}, { }, {
multi: true multi: true
}, callback }, callback);
},
ensureIndices: () -> ensureIndices() {
# For finding all updates that go into a diff for a doc // For finding all updates that go into a diff for a doc
db.docHistory.ensureIndex { doc_id: 1, v: 1 }, { background: true } db.docHistory.ensureIndex({ doc_id: 1, v: 1 }, { background: true });
# For finding all updates that affect a project // For finding all updates that affect a project
db.docHistory.ensureIndex { project_id: 1, "meta.end_ts": 1 }, { background: true } db.docHistory.ensureIndex({ project_id: 1, "meta.end_ts": 1 }, { background: true });
# For finding updates that don't yet have a project_id and need it inserting // For finding updates that don't yet have a project_id and need it inserting
db.docHistory.ensureIndex { doc_id: 1, project_id: 1 }, { background: true } db.docHistory.ensureIndex({ doc_id: 1, project_id: 1 }, { background: true });
# For finding project meta-data // For finding project meta-data
db.projectHistoryMetaData.ensureIndex { project_id: 1 }, { background: true } db.projectHistoryMetaData.ensureIndex({ project_id: 1 }, { background: true });
# TTL index for auto deleting week old temporary ops // TTL index for auto deleting week old temporary ops
db.docHistory.ensureIndex { expiresAt: 1 }, { expireAfterSeconds: 0, background: true } db.docHistory.ensureIndex({ expiresAt: 1 }, { expireAfterSeconds: 0, background: true });
# For finding packs to be checked for archiving // For finding packs to be checked for archiving
db.docHistory.ensureIndex { last_checked: 1 }, { background: true } db.docHistory.ensureIndex({ last_checked: 1 }, { background: true });
# For finding archived packs // For finding archived packs
db.docHistoryIndex.ensureIndex { project_id: 1 }, { background: true } return db.docHistoryIndex.ensureIndex({ project_id: 1 }, { background: true });
}
});
[ [
'getLastCompressedUpdate', 'getLastCompressedUpdate',
'getProjectMetaData', 'getProjectMetaData',
'setProjectMetaData' 'setProjectMetaData'
].map (method) -> ].map(method => metrics.timeAsyncMethod(MongoManager, method, 'mongo.MongoManager', logger));
metrics.timeAsyncMethod(MongoManager, method, 'mongo.MongoManager', logger)

File diff suppressed because it is too large Load diff

View file

@ -1,139 +1,183 @@
Settings = require "settings-sharelatex" /*
async = require "async" * decaffeinate suggestions:
_ = require "underscore" * DS101: Remove unnecessary use of Array.from
{db, ObjectId, BSON} = require "./mongojs" * DS102: Remove unnecessary code created because of implicit returns
fs = require "fs" * DS103: Rewrite code to no longer use __guard__
Metrics = require "metrics-sharelatex" * DS205: Consider reworking code to avoid use of IIFEs
Metrics.initialize("track-changes") * DS207: Consider shorter variations of null checks
logger = require "logger-sharelatex" * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
logger.initialize("track-changes-packworker") */
if Settings.sentry?.dsn? let LIMIT, pending;
logger.initializeErrorReporting(Settings.sentry.dsn) let project_id, doc_id;
const Settings = require("settings-sharelatex");
const async = require("async");
const _ = require("underscore");
const {db, ObjectId, BSON} = require("./mongojs");
const fs = require("fs");
const Metrics = require("metrics-sharelatex");
Metrics.initialize("track-changes");
const logger = require("logger-sharelatex");
logger.initialize("track-changes-packworker");
if ((Settings.sentry != null ? Settings.sentry.dsn : undefined) != null) {
logger.initializeErrorReporting(Settings.sentry.dsn);
}
DAYS = 24 * 3600 * 1000 const DAYS = 24 * 3600 * 1000;
LockManager = require "./LockManager" const LockManager = require("./LockManager");
PackManager = require "./PackManager" const PackManager = require("./PackManager");
# this worker script is forked by the main process to look for // this worker script is forked by the main process to look for
# document histories which can be archived // document histories which can be archived
source = process.argv[2] const source = process.argv[2];
DOCUMENT_PACK_DELAY = Number(process.argv[3]) || 1000 const DOCUMENT_PACK_DELAY = Number(process.argv[3]) || 1000;
TIMEOUT = Number(process.argv[4]) || 30*60*1000 const TIMEOUT = Number(process.argv[4]) || (30*60*1000);
COUNT = 0 # number processed let COUNT = 0; // number processed
TOTAL = 0 # total number to process let TOTAL = 0; // total number to process
if !source.match(/^[0-9]+$/) if (!source.match(/^[0-9]+$/)) {
file = fs.readFileSync source const file = fs.readFileSync(source);
result = for line in file.toString().split('\n') const result = (() => {
[project_id, doc_id] = line.split(' ') const result1 = [];
{doc_id, project_id} for (let line of Array.from(file.toString().split('\n'))) {
pending = _.filter result, (row) -> row?.doc_id?.match(/^[a-f0-9]{24}$/) [project_id, doc_id] = Array.from(line.split(' '));
else result1.push({doc_id, project_id});
LIMIT = Number(process.argv[2]) || 1000 }
return result1;
})();
pending = _.filter(result, row => __guard__(row != null ? row.doc_id : undefined, x => x.match(/^[a-f0-9]{24}$/)));
} else {
LIMIT = Number(process.argv[2]) || 1000;
}
shutDownRequested = false let shutDownRequested = false;
shutDownTimer = setTimeout () -> const shutDownTimer = setTimeout(function() {
logger.log "pack timed out, requesting shutdown" logger.log("pack timed out, requesting shutdown");
# start the shutdown on the next pack // start the shutdown on the next pack
shutDownRequested = true shutDownRequested = true;
# do a hard shutdown after a further 5 minutes // do a hard shutdown after a further 5 minutes
hardTimeout = setTimeout () -> const hardTimeout = setTimeout(function() {
logger.error "HARD TIMEOUT in pack archive worker" logger.error("HARD TIMEOUT in pack archive worker");
process.exit() return process.exit();
, 5*60*1000 }
hardTimeout.unref() , 5*60*1000);
, TIMEOUT return hardTimeout.unref();
}
, TIMEOUT);
logger.log "checking for updates, limit=#{LIMIT}, delay=#{DOCUMENT_PACK_DELAY}, timeout=#{TIMEOUT}" logger.log(`checking for updates, limit=${LIMIT}, delay=${DOCUMENT_PACK_DELAY}, timeout=${TIMEOUT}`);
# work around for https://github.com/mafintosh/mongojs/issues/224 // work around for https://github.com/mafintosh/mongojs/issues/224
db.close = (callback) -> db.close = function(callback) {
this._getServer (err, server) -> return this._getServer(function(err, server) {
return callback(err) if err? if (err != null) { return callback(err); }
server = if server.destroy? then server else server.topology server = (server.destroy != null) ? server : server.topology;
server.destroy(true, true) server.destroy(true, true);
callback() return callback();
});
};
finish = () -> const finish = function() {
if shutDownTimer? if (shutDownTimer != null) {
logger.log 'cancelling timeout' logger.log('cancelling timeout');
clearTimeout shutDownTimer clearTimeout(shutDownTimer);
logger.log 'closing db' }
db.close () -> logger.log('closing db');
logger.log 'closing LockManager Redis Connection' return db.close(function() {
LockManager.close () -> logger.log('closing LockManager Redis Connection');
logger.log {processedCount: COUNT, allCount: TOTAL}, 'ready to exit from pack archive worker' return LockManager.close(function() {
hardTimeout = setTimeout () -> logger.log({processedCount: COUNT, allCount: TOTAL}, 'ready to exit from pack archive worker');
logger.error 'hard exit from pack archive worker' const hardTimeout = setTimeout(function() {
process.exit(1) logger.error('hard exit from pack archive worker');
, 5*1000 return process.exit(1);
hardTimeout.unref() }
, 5*1000);
return hardTimeout.unref();
});
});
};
process.on 'exit', (code) -> process.on('exit', code => logger.log({code}, 'pack archive worker exited'));
logger.log {code}, 'pack archive worker exited'
processUpdates = (pending) -> const processUpdates = pending =>
async.eachSeries pending, (result, callback) -> async.eachSeries(pending, function(result, callback) {
{_id, project_id, doc_id} = result let _id;
COUNT++ ({_id, project_id, doc_id} = result);
logger.log {project_id, doc_id}, "processing #{COUNT}/#{TOTAL}" COUNT++;
if not project_id? or not doc_id? logger.log({project_id, doc_id}, `processing ${COUNT}/${TOTAL}`);
logger.log {project_id, doc_id}, "skipping pack, missing project/doc id" if ((project_id == null) || (doc_id == null)) {
return callback() logger.log({project_id, doc_id}, "skipping pack, missing project/doc id");
handler = (err, result) -> return callback();
if err? and err.code is "InternalError" and err.retryable }
logger.warn {err, result}, "ignoring S3 error in pack archive worker" const handler = function(err, result) {
# Ignore any s3 errors due to random problems if ((err != null) && (err.code === "InternalError") && err.retryable) {
err = null logger.warn({err, result}, "ignoring S3 error in pack archive worker");
if err? // Ignore any s3 errors due to random problems
logger.error {err, result}, "error in pack archive worker" err = null;
return callback(err) }
if shutDownRequested if (err != null) {
logger.warn "shutting down pack archive worker" logger.error({err, result}, "error in pack archive worker");
return callback(new Error("shutdown")) return callback(err);
setTimeout () -> }
callback(err, result) if (shutDownRequested) {
, DOCUMENT_PACK_DELAY logger.warn("shutting down pack archive worker");
if not _id? return callback(new Error("shutdown"));
PackManager.pushOldPacks project_id, doc_id, handler }
else return setTimeout(() => callback(err, result)
PackManager.processOldPack project_id, doc_id, _id, handler , DOCUMENT_PACK_DELAY);
, (err, results) -> };
if err? and err.message != "shutdown" if ((_id == null)) {
logger.error {err}, 'error in pack archive worker processUpdates' return PackManager.pushOldPacks(project_id, doc_id, handler);
finish() } else {
return PackManager.processOldPack(project_id, doc_id, _id, handler);
}
}
, function(err, results) {
if ((err != null) && (err.message !== "shutdown")) {
logger.error({err}, 'error in pack archive worker processUpdates');
}
return finish();
})
;
# find the packs which can be archived // find the packs which can be archived
ObjectIdFromDate = (date) -> const ObjectIdFromDate = function(date) {
id = Math.floor(date.getTime() / 1000).toString(16) + "0000000000000000"; const id = Math.floor(date.getTime() / 1000).toString(16) + "0000000000000000";
return ObjectId(id) return ObjectId(id);
};
# new approach, two passes // new approach, two passes
# find packs to be marked as finalised:true, those which have a newer pack present // find packs to be marked as finalised:true, those which have a newer pack present
# then only consider finalised:true packs for archiving // then only consider finalised:true packs for archiving
if pending? if (pending != null) {
logger.log "got #{pending.length} entries from #{source}" logger.log(`got ${pending.length} entries from ${source}`);
processUpdates pending processUpdates(pending);
else } else {
oneWeekAgo = new Date(Date.now() - 7 * DAYS) const oneWeekAgo = new Date(Date.now() - (7 * DAYS));
db.docHistory.find({ db.docHistory.find({
expiresAt: {$exists: false} expiresAt: {$exists: false},
project_id: {$exists: true} project_id: {$exists: true},
v_end: {$exists: true} v_end: {$exists: true},
_id: {$lt: ObjectIdFromDate(oneWeekAgo)} _id: {$lt: ObjectIdFromDate(oneWeekAgo)},
last_checked: {$lt: oneWeekAgo} last_checked: {$lt: oneWeekAgo}
}, {_id:1, doc_id:1, project_id:1}).sort({ }, {_id:1, doc_id:1, project_id:1}).sort({
last_checked:1 last_checked:1
}).limit LIMIT, (err, results) -> }).limit(LIMIT, function(err, results) {
if err? if (err != null) {
logger.log {err}, 'error checking for updates' logger.log({err}, 'error checking for updates');
finish() finish();
return return;
pending = _.uniq results, false, (result) -> result.doc_id.toString() }
TOTAL = pending.length pending = _.uniq(results, false, result => result.doc_id.toString());
logger.log "found #{TOTAL} documents to archive" TOTAL = pending.length;
processUpdates pending logger.log(`found ${TOTAL} documents to archive`);
return processUpdates(pending);
});
}
function __guard__(value, transform) {
return (typeof value !== 'undefined' && value !== null) ? transform(value) : undefined;
}

View file

@ -1,62 +1,84 @@
Heap = require "heap" /*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let ProjectIterator;
const Heap = require("heap");
module.exports = ProjectIterator = module.exports = (ProjectIterator =
class ProjectIterator (ProjectIterator = class ProjectIterator {
constructor: (packs, @before, @getPackByIdFn) -> constructor(packs, before, getPackByIdFn) {
byEndTs = (a,b) -> (b.meta.end_ts - a.meta.end_ts) || (a.fromIndex - b.fromIndex) this.before = before;
@packs = packs.slice().sort byEndTs this.getPackByIdFn = getPackByIdFn;
@queue = new Heap(byEndTs) const byEndTs = (a,b) => (b.meta.end_ts - a.meta.end_ts) || (a.fromIndex - b.fromIndex);
this.packs = packs.slice().sort(byEndTs);
this.queue = new Heap(byEndTs);
}
next: (callback) -> next(callback) {
# what's up next // what's up next
#console.log ">>> top item", iterator.packs[0] //console.log ">>> top item", iterator.packs[0]
iterator = this const iterator = this;
before = @before const { before } = this;
queue = iterator.queue const { queue } = iterator;
opsToReturn = [] const opsToReturn = [];
nextPack = iterator.packs[0] let nextPack = iterator.packs[0];
lowWaterMark = nextPack?.meta.end_ts || 0 let lowWaterMark = (nextPack != null ? nextPack.meta.end_ts : undefined) || 0;
nextItem = queue.peek() let nextItem = queue.peek();
#console.log "queue empty?", queue.empty() //console.log "queue empty?", queue.empty()
#console.log "nextItem", nextItem //console.log "nextItem", nextItem
#console.log "nextItem.meta.end_ts", nextItem?.meta.end_ts //console.log "nextItem.meta.end_ts", nextItem?.meta.end_ts
#console.log "lowWaterMark", lowWaterMark //console.log "lowWaterMark", lowWaterMark
while before? and nextPack?.meta.start_ts > before while ((before != null) && ((nextPack != null ? nextPack.meta.start_ts : undefined) > before)) {
# discard pack that is outside range // discard pack that is outside range
iterator.packs.shift() iterator.packs.shift();
nextPack = iterator.packs[0] nextPack = iterator.packs[0];
lowWaterMark = nextPack?.meta.end_ts || 0 lowWaterMark = (nextPack != null ? nextPack.meta.end_ts : undefined) || 0;
}
if (queue.empty() or nextItem?.meta.end_ts <= lowWaterMark) and nextPack? if ((queue.empty() || ((nextItem != null ? nextItem.meta.end_ts : undefined) <= lowWaterMark)) && (nextPack != null)) {
# retrieve the next pack and populate the queue // retrieve the next pack and populate the queue
return @getPackByIdFn nextPack.project_id, nextPack.doc_id, nextPack._id, (err, pack) -> return this.getPackByIdFn(nextPack.project_id, nextPack.doc_id, nextPack._id, function(err, pack) {
return callback(err) if err? if (err != null) { return callback(err); }
iterator.packs.shift() # have now retrieved this pack, remove it iterator.packs.shift(); // have now retrieved this pack, remove it
#console.log "got pack", pack //console.log "got pack", pack
for op in pack.pack when (not before? or op.meta.end_ts < before) for (let op of Array.from(pack.pack)) {
#console.log "adding op", op //console.log "adding op", op
op.doc_id = nextPack.doc_id if ((before == null) || (op.meta.end_ts < before)) {
op.project_id = nextPack.project_id op.doc_id = nextPack.doc_id;
queue.push op op.project_id = nextPack.project_id;
# now try again queue.push(op);
return iterator.next(callback) }
}
// now try again
return iterator.next(callback);
});
}
#console.log "nextItem", nextItem, "lowWaterMark", lowWaterMark //console.log "nextItem", nextItem, "lowWaterMark", lowWaterMark
while nextItem? and (nextItem?.meta.end_ts > lowWaterMark) while ((nextItem != null) && ((nextItem != null ? nextItem.meta.end_ts : undefined) > lowWaterMark)) {
opsToReturn.push nextItem opsToReturn.push(nextItem);
queue.pop() queue.pop();
nextItem = queue.peek() nextItem = queue.peek();
}
#console.log "queue empty?", queue.empty() //console.log "queue empty?", queue.empty()
#console.log "nextPack", nextPack? //console.log "nextPack", nextPack?
if queue.empty() and not nextPack? # got everything if (queue.empty() && (nextPack == null)) { // got everything
iterator._done = true iterator._done = true;
}
callback(null, opsToReturn) return callback(null, opsToReturn);
}
done: () -> done() {
return @_done return this._done;
}
}));

View file

@ -1,80 +1,121 @@
Settings = require "settings-sharelatex" /*
redis = require("redis-sharelatex") * decaffeinate suggestions:
rclient = redis.createClient(Settings.redis.history) * DS101: Remove unnecessary use of Array.from
Keys = Settings.redis.history.key_schema * DS102: Remove unnecessary code created because of implicit returns
async = require "async" * DS205: Consider reworking code to avoid use of IIFEs
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let RedisManager;
const Settings = require("settings-sharelatex");
const redis = require("redis-sharelatex");
const rclient = redis.createClient(Settings.redis.history);
const Keys = Settings.redis.history.key_schema;
const async = require("async");
module.exports = RedisManager = module.exports = (RedisManager = {
getOldestDocUpdates: (doc_id, batchSize, callback = (error, jsonUpdates) ->) -> getOldestDocUpdates(doc_id, batchSize, callback) {
key = Keys.uncompressedHistoryOps({doc_id}) if (callback == null) { callback = function(error, jsonUpdates) {}; }
rclient.lrange key, 0, batchSize - 1, callback const key = Keys.uncompressedHistoryOps({doc_id});
return rclient.lrange(key, 0, batchSize - 1, callback);
},
expandDocUpdates: (jsonUpdates, callback = (error, rawUpdates) ->) -> expandDocUpdates(jsonUpdates, callback) {
try let rawUpdates;
rawUpdates = ( JSON.parse(update) for update in jsonUpdates or [] ) if (callback == null) { callback = function(error, rawUpdates) {}; }
catch e try {
return callback(e) rawUpdates = ( Array.from(jsonUpdates || []).map((update) => JSON.parse(update)) );
callback null, rawUpdates } catch (e) {
return callback(e);
}
return callback(null, rawUpdates);
},
deleteAppliedDocUpdates: (project_id, doc_id, docUpdates, callback = (error) ->) -> deleteAppliedDocUpdates(project_id, doc_id, docUpdates, callback) {
multi = rclient.multi() if (callback == null) { callback = function(error) {}; }
# Delete all the updates which have been applied (exact match) const multi = rclient.multi();
for update in docUpdates or [] // Delete all the updates which have been applied (exact match)
multi.lrem Keys.uncompressedHistoryOps({doc_id}), 1, update for (let update of Array.from(docUpdates || [])) {
multi.exec (error, results) -> multi.lrem(Keys.uncompressedHistoryOps({doc_id}), 1, update);
return callback(error) if error? }
# It's ok to delete the doc_id from the set here. Even though the list return multi.exec(function(error, results) {
# of updates may not be empty, we will continue to process it until it is. if (error != null) { return callback(error); }
rclient.srem Keys.docsWithHistoryOps({project_id}), doc_id, (error) -> // It's ok to delete the doc_id from the set here. Even though the list
return callback(error) if error? // of updates may not be empty, we will continue to process it until it is.
callback null return rclient.srem(Keys.docsWithHistoryOps({project_id}), doc_id, function(error) {
if (error != null) { return callback(error); }
return callback(null);
});
});
},
getDocIdsWithHistoryOps: (project_id, callback = (error, doc_ids) ->) -> getDocIdsWithHistoryOps(project_id, callback) {
rclient.smembers Keys.docsWithHistoryOps({project_id}), callback if (callback == null) { callback = function(error, doc_ids) {}; }
return rclient.smembers(Keys.docsWithHistoryOps({project_id}), callback);
},
# iterate over keys asynchronously using redis scan (non-blocking) // iterate over keys asynchronously using redis scan (non-blocking)
# handle all the cluster nodes or single redis server // handle all the cluster nodes or single redis server
_getKeys: (pattern, callback) -> _getKeys(pattern, callback) {
nodes = rclient.nodes?('master') || [ rclient ]; const nodes = (typeof rclient.nodes === 'function' ? rclient.nodes('master') : undefined) || [ rclient ];
doKeyLookupForNode = (node, cb) -> const doKeyLookupForNode = (node, cb) => RedisManager._getKeysFromNode(node, pattern, cb);
RedisManager._getKeysFromNode node, pattern, cb return async.concatSeries(nodes, doKeyLookupForNode, callback);
async.concatSeries nodes, doKeyLookupForNode, callback },
_getKeysFromNode: (node, pattern, callback) -> _getKeysFromNode(node, pattern, callback) {
cursor = 0 # redis iterator let cursor = 0; // redis iterator
keySet = {} # use hash to avoid duplicate results const keySet = {}; // use hash to avoid duplicate results
# scan over all keys looking for pattern // scan over all keys looking for pattern
doIteration = (cb) -> var doIteration = cb =>
node.scan cursor, "MATCH", pattern, "COUNT", 1000, (error, reply) -> node.scan(cursor, "MATCH", pattern, "COUNT", 1000, function(error, reply) {
return callback(error) if error? let keys;
[cursor, keys] = reply if (error != null) { return callback(error); }
for key in keys [cursor, keys] = Array.from(reply);
keySet[key] = true for (let key of Array.from(keys)) {
if cursor == '0' # note redis returns string result not numeric keySet[key] = true;
return callback(null, Object.keys(keySet)) }
else if (cursor === '0') { // note redis returns string result not numeric
doIteration() return callback(null, Object.keys(keySet));
doIteration() } else {
return doIteration();
}
})
;
return doIteration();
},
# extract ids from keys like DocsWithHistoryOps:57fd0b1f53a8396d22b2c24b // extract ids from keys like DocsWithHistoryOps:57fd0b1f53a8396d22b2c24b
# or DocsWithHistoryOps:{57fd0b1f53a8396d22b2c24b} (for redis cluster) // or DocsWithHistoryOps:{57fd0b1f53a8396d22b2c24b} (for redis cluster)
_extractIds: (keyList) -> _extractIds(keyList) {
ids = for key in keyList const ids = (() => {
m = key.match(/:\{?([0-9a-f]{24})\}?/) # extract object id const result = [];
m[1] for (let key of Array.from(keyList)) {
return ids const m = key.match(/:\{?([0-9a-f]{24})\}?/); // extract object id
result.push(m[1]);
}
return result;
})();
return ids;
},
getProjectIdsWithHistoryOps: (callback = (error, project_ids) ->) -> getProjectIdsWithHistoryOps(callback) {
RedisManager._getKeys Keys.docsWithHistoryOps({project_id:"*"}), (error, project_keys) -> if (callback == null) { callback = function(error, project_ids) {}; }
return callback(error) if error? return RedisManager._getKeys(Keys.docsWithHistoryOps({project_id:"*"}), function(error, project_keys) {
project_ids = RedisManager._extractIds project_keys if (error != null) { return callback(error); }
callback(error, project_ids) const project_ids = RedisManager._extractIds(project_keys);
return callback(error, project_ids);
});
},
getAllDocIdsWithHistoryOps: (callback = (error, doc_ids) ->) -> getAllDocIdsWithHistoryOps(callback) {
# return all the docids, to find dangling history entries after // return all the docids, to find dangling history entries after
# everything is flushed. // everything is flushed.
RedisManager._getKeys Keys.uncompressedHistoryOps({doc_id:"*"}), (error, doc_keys) -> if (callback == null) { callback = function(error, doc_ids) {}; }
return callback(error) if error? return RedisManager._getKeys(Keys.uncompressedHistoryOps({doc_id:"*"}), function(error, doc_keys) {
doc_ids = RedisManager._extractIds doc_keys if (error != null) { return callback(error); }
callback(error, doc_ids) const doc_ids = RedisManager._extractIds(doc_keys);
return callback(error, doc_ids);
});
}
});

View file

@ -1,12 +1,24 @@
DocumentUpdaterManager = require "./DocumentUpdaterManager" /*
DiffManager = require "./DiffManager" * decaffeinate suggestions:
logger = require "logger-sharelatex" * DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let RestoreManager;
const DocumentUpdaterManager = require("./DocumentUpdaterManager");
const DiffManager = require("./DiffManager");
const logger = require("logger-sharelatex");
module.exports = RestoreManager = module.exports = (RestoreManager = {
restoreToBeforeVersion: (project_id, doc_id, version, user_id, callback = (error) ->) -> restoreToBeforeVersion(project_id, doc_id, version, user_id, callback) {
logger.log project_id: project_id, doc_id: doc_id, version: version, user_id: user_id, "restoring document" if (callback == null) { callback = function(error) {}; }
DiffManager.getDocumentBeforeVersion project_id, doc_id, version, (error, content) -> logger.log({project_id, doc_id, version, user_id}, "restoring document");
return callback(error) if error? return DiffManager.getDocumentBeforeVersion(project_id, doc_id, version, function(error, content) {
DocumentUpdaterManager.setDocument project_id, doc_id, content, user_id, (error) -> if (error != null) { return callback(error); }
return callback(error) if error? return DocumentUpdaterManager.setDocument(project_id, doc_id, content, user_id, function(error) {
callback() if (error != null) { return callback(error); }
return callback();
});
});
}
});

View file

@ -1,218 +1,278 @@
strInject = (s1, pos, s2) -> s1[...pos] + s2 + s1[pos..] /*
strRemove = (s1, pos, length) -> s1[...pos] + s1[(pos + length)..] * decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS103: Rewrite code to no longer use __guard__
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let oneMinute, twoMegabytes, UpdateCompressor;
const strInject = (s1, pos, s2) => s1.slice(0, pos) + s2 + s1.slice(pos);
const strRemove = (s1, pos, length) => s1.slice(0, pos) + s1.slice((pos + length));
diff_match_patch = require("../lib/diff_match_patch").diff_match_patch const { diff_match_patch } = require("../lib/diff_match_patch");
dmp = new diff_match_patch() const dmp = new diff_match_patch();
module.exports = UpdateCompressor = module.exports = (UpdateCompressor = {
NOOP: "noop" NOOP: "noop",
# Updates come from the doc updater in format // Updates come from the doc updater in format
# { // {
# op: [ { ... op1 ... }, { ... op2 ... } ] // op: [ { ... op1 ... }, { ... op2 ... } ]
# meta: { ts: ..., user_id: ... } // meta: { ts: ..., user_id: ... }
# } // }
# but it's easier to work with on op per update, so convert these updates to // but it's easier to work with on op per update, so convert these updates to
# our compressed format // our compressed format
# [{ // [{
# op: op1 // op: op1
# meta: { start_ts: ... , end_ts: ..., user_id: ... } // meta: { start_ts: ... , end_ts: ..., user_id: ... }
# }, { // }, {
# op: op2 // op: op2
# meta: { start_ts: ... , end_ts: ..., user_id: ... } // meta: { start_ts: ... , end_ts: ..., user_id: ... }
# }] // }]
convertToSingleOpUpdates: (updates) -> convertToSingleOpUpdates(updates) {
splitUpdates = [] const splitUpdates = [];
for update in updates for (let update of Array.from(updates)) {
# Reject any non-insert or delete ops, i.e. comments // Reject any non-insert or delete ops, i.e. comments
ops = update.op.filter (o) -> o.i? or o.d? const ops = update.op.filter(o => (o.i != null) || (o.d != null));
if ops.length == 0 if (ops.length === 0) {
splitUpdates.push splitUpdates.push({
op: UpdateCompressor.NOOP op: UpdateCompressor.NOOP,
meta: meta: {
start_ts: update.meta.start_ts or update.meta.ts start_ts: update.meta.start_ts || update.meta.ts,
end_ts: update.meta.end_ts or update.meta.ts end_ts: update.meta.end_ts || update.meta.ts,
user_id: update.meta.user_id user_id: update.meta.user_id
},
v: update.v v: update.v
else });
for op in ops } else {
splitUpdates.push for (let op of Array.from(ops)) {
op: op splitUpdates.push({
meta: op,
start_ts: update.meta.start_ts or update.meta.ts meta: {
end_ts: update.meta.end_ts or update.meta.ts start_ts: update.meta.start_ts || update.meta.ts,
end_ts: update.meta.end_ts || update.meta.ts,
user_id: update.meta.user_id user_id: update.meta.user_id
},
v: update.v v: update.v
return splitUpdates });
}
}
}
return splitUpdates;
},
concatUpdatesWithSameVersion: (updates) -> concatUpdatesWithSameVersion(updates) {
concattedUpdates = [] const concattedUpdates = [];
for update in updates for (let update of Array.from(updates)) {
lastUpdate = concattedUpdates[concattedUpdates.length - 1] const lastUpdate = concattedUpdates[concattedUpdates.length - 1];
if lastUpdate? and lastUpdate.v == update.v if ((lastUpdate != null) && (lastUpdate.v === update.v)) {
lastUpdate.op.push update.op unless update.op == UpdateCompressor.NOOP if (update.op !== UpdateCompressor.NOOP) { lastUpdate.op.push(update.op); }
else } else {
nextUpdate = const nextUpdate = {
op: [] op: [],
meta: update.meta meta: update.meta,
v: update.v v: update.v
nextUpdate.op.push update.op unless update.op == UpdateCompressor.NOOP };
concattedUpdates.push nextUpdate if (update.op !== UpdateCompressor.NOOP) { nextUpdate.op.push(update.op); }
return concattedUpdates concattedUpdates.push(nextUpdate);
}
}
return concattedUpdates;
},
compressRawUpdates: (lastPreviousUpdate, rawUpdates) -> compressRawUpdates(lastPreviousUpdate, rawUpdates) {
if lastPreviousUpdate?.op?.length > 1 if (__guard__(lastPreviousUpdate != null ? lastPreviousUpdate.op : undefined, x => x.length) > 1) {
# if the last previous update was an array op, don't compress onto it. // if the last previous update was an array op, don't compress onto it.
# The avoids cases where array length changes but version number doesn't // The avoids cases where array length changes but version number doesn't
return [lastPreviousUpdate].concat UpdateCompressor.compressRawUpdates(null,rawUpdates) return [lastPreviousUpdate].concat(UpdateCompressor.compressRawUpdates(null,rawUpdates));
if lastPreviousUpdate? }
rawUpdates = [lastPreviousUpdate].concat(rawUpdates) if (lastPreviousUpdate != null) {
updates = UpdateCompressor.convertToSingleOpUpdates(rawUpdates) rawUpdates = [lastPreviousUpdate].concat(rawUpdates);
updates = UpdateCompressor.compressUpdates(updates) }
return UpdateCompressor.concatUpdatesWithSameVersion(updates) let updates = UpdateCompressor.convertToSingleOpUpdates(rawUpdates);
updates = UpdateCompressor.compressUpdates(updates);
return UpdateCompressor.concatUpdatesWithSameVersion(updates);
},
compressUpdates: (updates) -> compressUpdates(updates) {
return [] if updates.length == 0 if (updates.length === 0) { return []; }
compressedUpdates = [updates.shift()] let compressedUpdates = [updates.shift()];
for update in updates for (let update of Array.from(updates)) {
lastCompressedUpdate = compressedUpdates.pop() const lastCompressedUpdate = compressedUpdates.pop();
if lastCompressedUpdate? if (lastCompressedUpdate != null) {
compressedUpdates = compressedUpdates.concat UpdateCompressor._concatTwoUpdates lastCompressedUpdate, update compressedUpdates = compressedUpdates.concat(UpdateCompressor._concatTwoUpdates(lastCompressedUpdate, update));
else } else {
compressedUpdates.push update compressedUpdates.push(update);
}
return compressedUpdates
MAX_TIME_BETWEEN_UPDATES: oneMinute = 60 * 1000
MAX_UPDATE_SIZE: twoMegabytes = 2* 1024 * 1024
_concatTwoUpdates: (firstUpdate, secondUpdate) ->
firstUpdate =
op: firstUpdate.op
meta:
user_id: firstUpdate.meta.user_id or null
start_ts: firstUpdate.meta.start_ts or firstUpdate.meta.ts
end_ts: firstUpdate.meta.end_ts or firstUpdate.meta.ts
v: firstUpdate.v
secondUpdate =
op: secondUpdate.op
meta:
user_id: secondUpdate.meta.user_id or null
start_ts: secondUpdate.meta.start_ts or secondUpdate.meta.ts
end_ts: secondUpdate.meta.end_ts or secondUpdate.meta.ts
v: secondUpdate.v
if firstUpdate.meta.user_id != secondUpdate.meta.user_id
return [firstUpdate, secondUpdate]
if secondUpdate.meta.start_ts - firstUpdate.meta.end_ts > UpdateCompressor.MAX_TIME_BETWEEN_UPDATES
return [firstUpdate, secondUpdate]
firstOp = firstUpdate.op
secondOp = secondUpdate.op
firstSize = firstOp.i?.length or firstOp.d?.length
secondSize = secondOp.i?.length or secondOp.d?.length
# Two inserts
if firstOp.i? and secondOp.i? and firstOp.p <= secondOp.p <= (firstOp.p + firstOp.i.length) and firstSize + secondSize < UpdateCompressor.MAX_UPDATE_SIZE
return [
meta:
start_ts: firstUpdate.meta.start_ts
end_ts: secondUpdate.meta.end_ts
user_id: firstUpdate.meta.user_id
op:
p: firstOp.p
i: strInject(firstOp.i, secondOp.p - firstOp.p, secondOp.i)
v: secondUpdate.v
]
# Two deletes
else if firstOp.d? and secondOp.d? and secondOp.p <= firstOp.p <= (secondOp.p + secondOp.d.length) and firstSize + secondSize < UpdateCompressor.MAX_UPDATE_SIZE
return [
meta:
start_ts: firstUpdate.meta.start_ts
end_ts: secondUpdate.meta.end_ts
user_id: firstUpdate.meta.user_id
op:
p: secondOp.p
d: strInject(secondOp.d, firstOp.p - secondOp.p, firstOp.d)
v: secondUpdate.v
]
# An insert and then a delete
else if firstOp.i? and secondOp.d? and firstOp.p <= secondOp.p <= (firstOp.p + firstOp.i.length)
offset = secondOp.p - firstOp.p
insertedText = firstOp.i.slice(offset, offset + secondOp.d.length)
# Only trim the insert when the delete is fully contained within in it
if insertedText == secondOp.d
insert = strRemove(firstOp.i, offset, secondOp.d.length)
return [
meta:
start_ts: firstUpdate.meta.start_ts
end_ts: secondUpdate.meta.end_ts
user_id: firstUpdate.meta.user_id
op:
p: firstOp.p
i: insert
v: secondUpdate.v
]
else
# This will only happen if the delete extends outside the insert
return [firstUpdate, secondUpdate]
# A delete then an insert at the same place, likely a copy-paste of a chunk of content
else if firstOp.d? and secondOp.i? and firstOp.p == secondOp.p
offset = firstOp.p
diff_ops = @diffAsShareJsOps(firstOp.d, secondOp.i)
if diff_ops.length == 0
return [{ # Noop
meta:
start_ts: firstUpdate.meta.start_ts
end_ts: secondUpdate.meta.end_ts
user_id: firstUpdate.meta.user_id
op:
p: firstOp.p
i: ""
v: secondUpdate.v
}]
else
return diff_ops.map (op) ->
op.p += offset
return {
meta:
start_ts: firstUpdate.meta.start_ts
end_ts: secondUpdate.meta.end_ts
user_id: firstUpdate.meta.user_id
op: op
v: secondUpdate.v
} }
else return compressedUpdates;
return [firstUpdate, secondUpdate] },
ADDED: 1 MAX_TIME_BETWEEN_UPDATES: (oneMinute = 60 * 1000),
REMOVED: -1 MAX_UPDATE_SIZE: (twoMegabytes = 2* 1024 * 1024),
UNCHANGED: 0
diffAsShareJsOps: (before, after, callback = (error, ops) ->) ->
diffs = dmp.diff_main(before, after)
dmp.diff_cleanupSemantic(diffs)
ops = [] _concatTwoUpdates(firstUpdate, secondUpdate) {
position = 0 let offset;
for diff in diffs firstUpdate = {
type = diff[0] op: firstUpdate.op,
content = diff[1] meta: {
if type == @ADDED user_id: firstUpdate.meta.user_id || null,
ops.push start_ts: firstUpdate.meta.start_ts || firstUpdate.meta.ts,
i: content end_ts: firstUpdate.meta.end_ts || firstUpdate.meta.ts
},
v: firstUpdate.v
};
secondUpdate = {
op: secondUpdate.op,
meta: {
user_id: secondUpdate.meta.user_id || null,
start_ts: secondUpdate.meta.start_ts || secondUpdate.meta.ts,
end_ts: secondUpdate.meta.end_ts || secondUpdate.meta.ts
},
v: secondUpdate.v
};
if (firstUpdate.meta.user_id !== secondUpdate.meta.user_id) {
return [firstUpdate, secondUpdate];
}
if ((secondUpdate.meta.start_ts - firstUpdate.meta.end_ts) > UpdateCompressor.MAX_TIME_BETWEEN_UPDATES) {
return [firstUpdate, secondUpdate];
}
const firstOp = firstUpdate.op;
const secondOp = secondUpdate.op;
const firstSize = (firstOp.i != null ? firstOp.i.length : undefined) || (firstOp.d != null ? firstOp.d.length : undefined);
const secondSize = (secondOp.i != null ? secondOp.i.length : undefined) || (secondOp.d != null ? secondOp.d.length : undefined);
// Two inserts
if ((firstOp.i != null) && (secondOp.i != null) && (firstOp.p <= secondOp.p && secondOp.p <= (firstOp.p + firstOp.i.length)) && ((firstSize + secondSize) < UpdateCompressor.MAX_UPDATE_SIZE)) {
return [{
meta: {
start_ts: firstUpdate.meta.start_ts,
end_ts: secondUpdate.meta.end_ts,
user_id: firstUpdate.meta.user_id
},
op: {
p: firstOp.p,
i: strInject(firstOp.i, secondOp.p - firstOp.p, secondOp.i)
},
v: secondUpdate.v
}
];
// Two deletes
} else if ((firstOp.d != null) && (secondOp.d != null) && (secondOp.p <= firstOp.p && firstOp.p <= (secondOp.p + secondOp.d.length)) && ((firstSize + secondSize) < UpdateCompressor.MAX_UPDATE_SIZE)) {
return [{
meta: {
start_ts: firstUpdate.meta.start_ts,
end_ts: secondUpdate.meta.end_ts,
user_id: firstUpdate.meta.user_id
},
op: {
p: secondOp.p,
d: strInject(secondOp.d, firstOp.p - secondOp.p, firstOp.d)
},
v: secondUpdate.v
}
];
// An insert and then a delete
} else if ((firstOp.i != null) && (secondOp.d != null) && (firstOp.p <= secondOp.p && secondOp.p <= (firstOp.p + firstOp.i.length))) {
offset = secondOp.p - firstOp.p;
const insertedText = firstOp.i.slice(offset, offset + secondOp.d.length);
// Only trim the insert when the delete is fully contained within in it
if (insertedText === secondOp.d) {
const insert = strRemove(firstOp.i, offset, secondOp.d.length);
return [{
meta: {
start_ts: firstUpdate.meta.start_ts,
end_ts: secondUpdate.meta.end_ts,
user_id: firstUpdate.meta.user_id
},
op: {
p: firstOp.p,
i: insert
},
v: secondUpdate.v
}
];
} else {
// This will only happen if the delete extends outside the insert
return [firstUpdate, secondUpdate];
}
// A delete then an insert at the same place, likely a copy-paste of a chunk of content
} else if ((firstOp.d != null) && (secondOp.i != null) && (firstOp.p === secondOp.p)) {
offset = firstOp.p;
const diff_ops = this.diffAsShareJsOps(firstOp.d, secondOp.i);
if (diff_ops.length === 0) {
return [{ // Noop
meta: {
start_ts: firstUpdate.meta.start_ts,
end_ts: secondUpdate.meta.end_ts,
user_id: firstUpdate.meta.user_id
},
op: {
p: firstOp.p,
i: ""
},
v: secondUpdate.v
}];
} else {
return diff_ops.map(function(op) {
op.p += offset;
return {
meta: {
start_ts: firstUpdate.meta.start_ts,
end_ts: secondUpdate.meta.end_ts,
user_id: firstUpdate.meta.user_id
},
op,
v: secondUpdate.v
};});
}
} else {
return [firstUpdate, secondUpdate];
}
},
ADDED: 1,
REMOVED: -1,
UNCHANGED: 0,
diffAsShareJsOps(before, after, callback) {
if (callback == null) { callback = function(error, ops) {}; }
const diffs = dmp.diff_main(before, after);
dmp.diff_cleanupSemantic(diffs);
const ops = [];
let position = 0;
for (let diff of Array.from(diffs)) {
const type = diff[0];
const content = diff[1];
if (type === this.ADDED) {
ops.push({
i: content,
p: position p: position
position += content.length });
else if type == @REMOVED position += content.length;
ops.push } else if (type === this.REMOVED) {
d: content ops.push({
d: content,
p: position p: position
else if type == @UNCHANGED });
position += content.length } else if (type === this.UNCHANGED) {
else position += content.length;
throw "Unknown type" } else {
return ops throw "Unknown type";
}
}
return ops;
}
});
function __guard__(value, transform) {
return (typeof value !== 'undefined' && value !== null) ? transform(value) : undefined;
}

View file

@ -1,23 +1,44 @@
MongoManager = require "./MongoManager" /*
WebApiManager = require "./WebApiManager" * decaffeinate suggestions:
logger = require "logger-sharelatex" * DS102: Remove unnecessary code created because of implicit returns
* DS103: Rewrite code to no longer use __guard__
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let UpdateTrimmer;
const MongoManager = require("./MongoManager");
const WebApiManager = require("./WebApiManager");
const logger = require("logger-sharelatex");
module.exports = UpdateTrimmer = module.exports = (UpdateTrimmer = {
shouldTrimUpdates: (project_id, callback = (error, shouldTrim) ->) -> shouldTrimUpdates(project_id, callback) {
MongoManager.getProjectMetaData project_id, (error, metadata) -> if (callback == null) { callback = function(error, shouldTrim) {}; }
return callback(error) if error? return MongoManager.getProjectMetaData(project_id, function(error, metadata) {
if metadata?.preserveHistory if (error != null) { return callback(error); }
return callback null, false if (metadata != null ? metadata.preserveHistory : undefined) {
else return callback(null, false);
WebApiManager.getProjectDetails project_id, (error, details) -> } else {
return callback(error) if error? return WebApiManager.getProjectDetails(project_id, function(error, details) {
logger.log project_id: project_id, details: details, "got details" if (error != null) { return callback(error); }
if details?.features?.versioning logger.log({project_id, details}, "got details");
MongoManager.setProjectMetaData project_id, preserveHistory: true, (error) -> if (__guard__(details != null ? details.features : undefined, x => x.versioning)) {
return callback(error) if error? return MongoManager.setProjectMetaData(project_id, {preserveHistory: true}, function(error) {
MongoManager.upgradeHistory project_id, (error) -> if (error != null) { return callback(error); }
return callback(error) if error? return MongoManager.upgradeHistory(project_id, function(error) {
callback null, false if (error != null) { return callback(error); }
else return callback(null, false);
callback null, true });
});
} else {
return callback(null, true);
}
});
}
});
}
});
function __guard__(value, transform) {
return (typeof value !== 'undefined' && value !== null) ? transform(value) : undefined;
}

View file

@ -1,344 +1,494 @@
MongoManager = require "./MongoManager" /*
PackManager = require "./PackManager" * decaffeinate suggestions:
RedisManager = require "./RedisManager" * DS101: Remove unnecessary use of Array.from
UpdateCompressor = require "./UpdateCompressor" * DS102: Remove unnecessary code created because of implicit returns
LockManager = require "./LockManager" * DS103: Rewrite code to no longer use __guard__
WebApiManager = require "./WebApiManager" * DS205: Consider reworking code to avoid use of IIFEs
UpdateTrimmer = require "./UpdateTrimmer" * DS207: Consider shorter variations of null checks
logger = require "logger-sharelatex" * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
async = require "async" */
_ = require "underscore" let fiveMinutes, UpdatesManager;
Settings = require "settings-sharelatex" const MongoManager = require("./MongoManager");
keys = Settings.redis.lock.key_schema const PackManager = require("./PackManager");
const RedisManager = require("./RedisManager");
const UpdateCompressor = require("./UpdateCompressor");
const LockManager = require("./LockManager");
const WebApiManager = require("./WebApiManager");
const UpdateTrimmer = require("./UpdateTrimmer");
const logger = require("logger-sharelatex");
const async = require("async");
const _ = require("underscore");
const Settings = require("settings-sharelatex");
const keys = Settings.redis.lock.key_schema;
module.exports = UpdatesManager = module.exports = (UpdatesManager = {
compressAndSaveRawUpdates: (project_id, doc_id, rawUpdates, temporary, callback = (error) ->) -> compressAndSaveRawUpdates(project_id, doc_id, rawUpdates, temporary, callback) {
length = rawUpdates.length let i;
if length == 0 if (callback == null) { callback = function(error) {}; }
return callback() const { length } = rawUpdates;
if (length === 0) {
return callback();
}
# check that ops are in the correct order // check that ops are in the correct order
for op, i in rawUpdates when i > 0 for (i = 0; i < rawUpdates.length; i++) {
thisVersion = op?.v const op = rawUpdates[i];
prevVersion = rawUpdates[i-1]?.v if (i > 0) {
if not (prevVersion < thisVersion) const thisVersion = op != null ? op.v : undefined;
logger.error project_id: project_id, doc_id: doc_id, rawUpdates:rawUpdates, temporary: temporary, thisVersion:thisVersion, prevVersion:prevVersion, "op versions out of order" const prevVersion = __guard__(rawUpdates[i-1], x => x.v);
if (!(prevVersion < thisVersion)) {
logger.error({project_id, doc_id, rawUpdates, temporary, thisVersion, prevVersion}, "op versions out of order");
}
}
}
# FIXME: we no longer need the lastCompressedUpdate, so change functions not to need it // FIXME: we no longer need the lastCompressedUpdate, so change functions not to need it
# CORRECTION: we do use it to log the time in case of error // CORRECTION: we do use it to log the time in case of error
MongoManager.peekLastCompressedUpdate doc_id, (error, lastCompressedUpdate, lastVersion) -> return MongoManager.peekLastCompressedUpdate(doc_id, function(error, lastCompressedUpdate, lastVersion) {
# lastCompressedUpdate is the most recent update in Mongo, and // lastCompressedUpdate is the most recent update in Mongo, and
# lastVersion is its sharejs version number. // lastVersion is its sharejs version number.
# //
# The peekLastCompressedUpdate method may pass the update back // The peekLastCompressedUpdate method may pass the update back
# as 'null' (for example if the previous compressed update has // as 'null' (for example if the previous compressed update has
# been archived). In this case it can still pass back the // been archived). In this case it can still pass back the
# lastVersion from the update to allow us to check consistency. // lastVersion from the update to allow us to check consistency.
return callback(error) if error? let op;
if (error != null) { return callback(error); }
# Ensure that raw updates start where lastVersion left off // Ensure that raw updates start where lastVersion left off
if lastVersion? if (lastVersion != null) {
discardedUpdates = [] const discardedUpdates = [];
rawUpdates = rawUpdates.slice(0) rawUpdates = rawUpdates.slice(0);
while rawUpdates[0]? and rawUpdates[0].v <= lastVersion while ((rawUpdates[0] != null) && (rawUpdates[0].v <= lastVersion)) {
discardedUpdates.push rawUpdates.shift() discardedUpdates.push(rawUpdates.shift());
if discardedUpdates.length }
logger.error project_id: project_id, doc_id: doc_id, discardedUpdates: discardedUpdates, temporary: temporary, lastVersion: lastVersion, "discarded updates already present" if (discardedUpdates.length) {
logger.error({project_id, doc_id, discardedUpdates, temporary, lastVersion}, "discarded updates already present");
}
if rawUpdates[0]? and rawUpdates[0].v != lastVersion + 1 if ((rawUpdates[0] != null) && (rawUpdates[0].v !== (lastVersion + 1))) {
ts = lastCompressedUpdate?.meta?.end_ts const ts = __guard__(lastCompressedUpdate != null ? lastCompressedUpdate.meta : undefined, x1 => x1.end_ts);
last_timestamp = if ts? then new Date(ts) else 'unknown time' const last_timestamp = (ts != null) ? new Date(ts) : 'unknown time';
error = new Error("Tried to apply raw op at version #{rawUpdates[0].v} to last compressed update with version #{lastVersion} from #{last_timestamp}") error = new Error(`Tried to apply raw op at version ${rawUpdates[0].v} to last compressed update with version ${lastVersion} from ${last_timestamp}`);
logger.error err: error, doc_id: doc_id, project_id: project_id, prev_end_ts: ts, temporary: temporary, lastCompressedUpdate: lastCompressedUpdate, "inconsistent doc versions" logger.error({err: error, doc_id, project_id, prev_end_ts: ts, temporary, lastCompressedUpdate}, "inconsistent doc versions");
if Settings.trackchanges?.continueOnError and rawUpdates[0].v > lastVersion + 1 if ((Settings.trackchanges != null ? Settings.trackchanges.continueOnError : undefined) && (rawUpdates[0].v > (lastVersion + 1))) {
# we have lost some ops - continue to write into the database, we can't recover at this point // we have lost some ops - continue to write into the database, we can't recover at this point
lastCompressedUpdate = null lastCompressedUpdate = null;
else } else {
return callback error return callback(error);
}
}
}
if rawUpdates.length == 0 if (rawUpdates.length === 0) {
return callback() return callback();
}
# some old large ops in redis need to be rejected, they predate // some old large ops in redis need to be rejected, they predate
# the size limit that now prevents them going through the system // the size limit that now prevents them going through the system
REJECT_LARGE_OP_SIZE = 4 * 1024 * 1024 const REJECT_LARGE_OP_SIZE = 4 * 1024 * 1024;
for rawUpdate in rawUpdates for (var rawUpdate of Array.from(rawUpdates)) {
opSizes = ((op.i?.length || op.d?.length) for op in rawUpdate?.op or []) const opSizes = ((() => {
size = _.max opSizes const result = [];
if size > REJECT_LARGE_OP_SIZE for (op of Array.from((rawUpdate != null ? rawUpdate.op : undefined) || [])) { result.push(((op.i != null ? op.i.length : undefined) || (op.d != null ? op.d.length : undefined)));
error = new Error("dropped op exceeding maximum allowed size of #{REJECT_LARGE_OP_SIZE}") }
logger.error err: error, doc_id: doc_id, project_id: project_id, size: size, rawUpdate: rawUpdate, "dropped op - too big" return result;
rawUpdate.op = [] })());
const size = _.max(opSizes);
if (size > REJECT_LARGE_OP_SIZE) {
error = new Error(`dropped op exceeding maximum allowed size of ${REJECT_LARGE_OP_SIZE}`);
logger.error({err: error, doc_id, project_id, size, rawUpdate}, "dropped op - too big");
rawUpdate.op = [];
}
}
compressedUpdates = UpdateCompressor.compressRawUpdates null, rawUpdates const compressedUpdates = UpdateCompressor.compressRawUpdates(null, rawUpdates);
PackManager.insertCompressedUpdates project_id, doc_id, lastCompressedUpdate, compressedUpdates, temporary, (error, result) -> return PackManager.insertCompressedUpdates(project_id, doc_id, lastCompressedUpdate, compressedUpdates, temporary, function(error, result) {
return callback(error) if error? if (error != null) { return callback(error); }
logger.log {project_id, doc_id, orig_v: lastCompressedUpdate?.v, new_v: result.v}, "inserted updates into pack" if result? if (result != null) { logger.log({project_id, doc_id, orig_v: (lastCompressedUpdate != null ? lastCompressedUpdate.v : undefined), new_v: result.v}, "inserted updates into pack"); }
callback() return callback();
});
});
},
# Check whether the updates are temporary (per-project property) // Check whether the updates are temporary (per-project property)
_prepareProjectForUpdates: (project_id, callback = (error, temporary) ->) -> _prepareProjectForUpdates(project_id, callback) {
UpdateTrimmer.shouldTrimUpdates project_id, (error, temporary) -> if (callback == null) { callback = function(error, temporary) {}; }
return callback(error) if error? return UpdateTrimmer.shouldTrimUpdates(project_id, function(error, temporary) {
callback(null, temporary) if (error != null) { return callback(error); }
return callback(null, temporary);
});
},
# Check for project id on document history (per-document property) // Check for project id on document history (per-document property)
_prepareDocForUpdates: (project_id, doc_id, callback = (error) ->) -> _prepareDocForUpdates(project_id, doc_id, callback) {
MongoManager.backportProjectId project_id, doc_id, (error) -> if (callback == null) { callback = function(error) {}; }
return callback(error) if error? return MongoManager.backportProjectId(project_id, doc_id, function(error) {
callback(null) if (error != null) { return callback(error); }
return callback(null);
});
},
# Apply updates for specific project/doc after preparing at project and doc level // Apply updates for specific project/doc after preparing at project and doc level
REDIS_READ_BATCH_SIZE: 100 REDIS_READ_BATCH_SIZE: 100,
processUncompressedUpdates: (project_id, doc_id, temporary, callback = (error) ->) -> processUncompressedUpdates(project_id, doc_id, temporary, callback) {
# get the updates as strings from redis (so we can delete them after they are applied) // get the updates as strings from redis (so we can delete them after they are applied)
RedisManager.getOldestDocUpdates doc_id, UpdatesManager.REDIS_READ_BATCH_SIZE, (error, docUpdates) -> if (callback == null) { callback = function(error) {}; }
return callback(error) if error? return RedisManager.getOldestDocUpdates(doc_id, UpdatesManager.REDIS_READ_BATCH_SIZE, function(error, docUpdates) {
length = docUpdates.length if (error != null) { return callback(error); }
# parse the redis strings into ShareJs updates const { length } = docUpdates;
RedisManager.expandDocUpdates docUpdates, (error, rawUpdates) -> // parse the redis strings into ShareJs updates
if error? return RedisManager.expandDocUpdates(docUpdates, function(error, rawUpdates) {
logger.err project_id: project_id, doc_id: doc_id, docUpdates: docUpdates, "failed to parse docUpdates" if (error != null) {
return callback(error) logger.err({project_id, doc_id, docUpdates}, "failed to parse docUpdates");
logger.log project_id: project_id, doc_id: doc_id, rawUpdates: rawUpdates, "retrieved raw updates from redis" return callback(error);
UpdatesManager.compressAndSaveRawUpdates project_id, doc_id, rawUpdates, temporary, (error) -> }
return callback(error) if error? logger.log({project_id, doc_id, rawUpdates}, "retrieved raw updates from redis");
logger.log project_id: project_id, doc_id: doc_id, "compressed and saved doc updates" return UpdatesManager.compressAndSaveRawUpdates(project_id, doc_id, rawUpdates, temporary, function(error) {
# delete the applied updates from redis if (error != null) { return callback(error); }
RedisManager.deleteAppliedDocUpdates project_id, doc_id, docUpdates, (error) -> logger.log({project_id, doc_id}, "compressed and saved doc updates");
return callback(error) if error? // delete the applied updates from redis
if length == UpdatesManager.REDIS_READ_BATCH_SIZE return RedisManager.deleteAppliedDocUpdates(project_id, doc_id, docUpdates, function(error) {
# There might be more updates if (error != null) { return callback(error); }
logger.log project_id: project_id, doc_id: doc_id, "continuing processing updates" if (length === UpdatesManager.REDIS_READ_BATCH_SIZE) {
setTimeout () -> // There might be more updates
UpdatesManager.processUncompressedUpdates project_id, doc_id, temporary, callback logger.log({project_id, doc_id}, "continuing processing updates");
, 0 return setTimeout(() => UpdatesManager.processUncompressedUpdates(project_id, doc_id, temporary, callback)
else , 0);
logger.log project_id: project_id, doc_id: doc_id, "all raw updates processed" } else {
callback() logger.log({project_id, doc_id}, "all raw updates processed");
return callback();
}
});
});
});
});
},
# Process updates for a doc when we flush it individually // Process updates for a doc when we flush it individually
processUncompressedUpdatesWithLock: (project_id, doc_id, callback = (error) ->) -> processUncompressedUpdatesWithLock(project_id, doc_id, callback) {
UpdatesManager._prepareProjectForUpdates project_id, (error, temporary) -> if (callback == null) { callback = function(error) {}; }
return callback(error) if error? return UpdatesManager._prepareProjectForUpdates(project_id, function(error, temporary) {
UpdatesManager._processUncompressedUpdatesForDocWithLock project_id, doc_id, temporary, callback if (error != null) { return callback(error); }
return UpdatesManager._processUncompressedUpdatesForDocWithLock(project_id, doc_id, temporary, callback);
});
},
# Process updates for a doc when the whole project is flushed (internal method) // Process updates for a doc when the whole project is flushed (internal method)
_processUncompressedUpdatesForDocWithLock: (project_id, doc_id, temporary, callback = (error) ->) -> _processUncompressedUpdatesForDocWithLock(project_id, doc_id, temporary, callback) {
UpdatesManager._prepareDocForUpdates project_id, doc_id, (error) -> if (callback == null) { callback = function(error) {}; }
return callback(error) if error? return UpdatesManager._prepareDocForUpdates(project_id, doc_id, function(error) {
LockManager.runWithLock( if (error != null) { return callback(error); }
return LockManager.runWithLock(
keys.historyLock({doc_id}), keys.historyLock({doc_id}),
(releaseLock) -> releaseLock => UpdatesManager.processUncompressedUpdates(project_id, doc_id, temporary, releaseLock),
UpdatesManager.processUncompressedUpdates project_id, doc_id, temporary, releaseLock
callback callback
);
});
},
// Process all updates for a project, only check project-level information once
processUncompressedUpdatesForProject(project_id, callback) {
if (callback == null) { callback = function(error) {}; }
return RedisManager.getDocIdsWithHistoryOps(project_id, function(error, doc_ids) {
if (error != null) { return callback(error); }
return UpdatesManager._prepareProjectForUpdates(project_id, function(error, temporary) {
const jobs = [];
for (let doc_id of Array.from(doc_ids)) {
(doc_id =>
jobs.push(cb => UpdatesManager._processUncompressedUpdatesForDocWithLock(project_id, doc_id, temporary, cb))
)(doc_id);
}
return async.parallelLimit(jobs, 5, callback);
});
});
},
// flush all outstanding changes
flushAll(limit, callback) {
if (callback == null) { callback = function(error, result) {}; }
return RedisManager.getProjectIdsWithHistoryOps(function(error, project_ids) {
let project_id;
if (error != null) { return callback(error); }
logger.log({count: (project_ids != null ? project_ids.length : undefined), project_ids}, "found projects");
const jobs = [];
project_ids = _.shuffle(project_ids); // randomise to avoid hitting same projects each time
const selectedProjects = limit < 0 ? project_ids : project_ids.slice(0, limit);
for (project_id of Array.from(selectedProjects)) {
(project_id =>
jobs.push(cb =>
UpdatesManager.processUncompressedUpdatesForProject(project_id, err => cb(null, {failed: (err != null), project_id}))
) )
)(project_id);
}
return async.series(jobs, function(error, result) {
let x;
if (error != null) { return callback(error); }
const failedProjects = ((() => {
const result1 = [];
for (x of Array.from(result)) { if (x.failed) {
result1.push(x.project_id);
}
}
return result1;
})());
const succeededProjects = ((() => {
const result2 = [];
for (x of Array.from(result)) { if (!x.failed) {
result2.push(x.project_id);
}
}
return result2;
})());
return callback(null, {failed: failedProjects, succeeded: succeededProjects, all: project_ids});
});
});
},
# Process all updates for a project, only check project-level information once getDanglingUpdates(callback) {
processUncompressedUpdatesForProject: (project_id, callback = (error) ->) -> if (callback == null) { callback = function(error, doc_ids) {}; }
RedisManager.getDocIdsWithHistoryOps project_id, (error, doc_ids) -> return RedisManager.getAllDocIdsWithHistoryOps(function(error, all_doc_ids) {
return callback(error) if error? if (error != null) { return callback(error); }
UpdatesManager._prepareProjectForUpdates project_id, (error, temporary) -> return RedisManager.getProjectIdsWithHistoryOps(function(error, all_project_ids) {
jobs = [] if (error != null) { return callback(error); }
for doc_id in doc_ids // function to get doc_ids for each project
do (doc_id) -> const task = cb => async.concatSeries(all_project_ids, RedisManager.getDocIdsWithHistoryOps, cb);
jobs.push (cb) -> // find the dangling doc ids
UpdatesManager._processUncompressedUpdatesForDocWithLock project_id, doc_id, temporary, cb return task(function(error, project_doc_ids) {
async.parallelLimit jobs, 5, callback const dangling_doc_ids = _.difference(all_doc_ids, project_doc_ids);
logger.log({all_doc_ids, all_project_ids, project_doc_ids, dangling_doc_ids}, "checking for dangling doc ids");
return callback(null, dangling_doc_ids);
});
});
});
},
# flush all outstanding changes getDocUpdates(project_id, doc_id, options, callback) {
flushAll: (limit, callback = (error, result) ->) -> if (options == null) { options = {}; }
RedisManager.getProjectIdsWithHistoryOps (error, project_ids) -> if (callback == null) { callback = function(error, updates) {}; }
return callback(error) if error? return UpdatesManager.processUncompressedUpdatesWithLock(project_id, doc_id, function(error) {
logger.log {count: project_ids?.length, project_ids: project_ids}, "found projects" if (error != null) { return callback(error); }
jobs = [] //console.log "options", options
project_ids = _.shuffle project_ids # randomise to avoid hitting same projects each time return PackManager.getOpsByVersionRange(project_id, doc_id, options.from, options.to, function(error, updates) {
selectedProjects = if limit < 0 then project_ids else project_ids[0...limit] if (error != null) { return callback(error); }
for project_id in selectedProjects return callback(null, updates);
do (project_id) -> });
jobs.push (cb) -> });
UpdatesManager.processUncompressedUpdatesForProject project_id, (err) -> },
return cb(null, {failed: err?, project_id: project_id})
async.series jobs, (error, result) ->
return callback(error) if error?
failedProjects = (x.project_id for x in result when x.failed)
succeededProjects = (x.project_id for x in result when not x.failed)
callback(null, {failed: failedProjects, succeeded: succeededProjects, all: project_ids})
getDanglingUpdates: (callback = (error, doc_ids) ->) -> getDocUpdatesWithUserInfo(project_id, doc_id, options, callback) {
RedisManager.getAllDocIdsWithHistoryOps (error, all_doc_ids) -> if (options == null) { options = {}; }
return callback(error) if error? if (callback == null) { callback = function(error, updates) {}; }
RedisManager.getProjectIdsWithHistoryOps (error, all_project_ids) -> return UpdatesManager.getDocUpdates(project_id, doc_id, options, function(error, updates) {
return callback(error) if error? if (error != null) { return callback(error); }
# function to get doc_ids for each project return UpdatesManager.fillUserInfo(updates, function(error, updates) {
task = (cb) -> async.concatSeries all_project_ids, RedisManager.getDocIdsWithHistoryOps, cb if (error != null) { return callback(error); }
# find the dangling doc ids return callback(null, updates);
task (error, project_doc_ids) -> });
dangling_doc_ids = _.difference(all_doc_ids, project_doc_ids) });
logger.log {all_doc_ids: all_doc_ids, all_project_ids: all_project_ids, project_doc_ids: project_doc_ids, dangling_doc_ids: dangling_doc_ids}, "checking for dangling doc ids" },
callback(null, dangling_doc_ids)
getDocUpdates: (project_id, doc_id, options = {}, callback = (error, updates) ->) -> getSummarizedProjectUpdates(project_id, options, callback) {
UpdatesManager.processUncompressedUpdatesWithLock project_id, doc_id, (error) -> if (options == null) { options = {}; }
return callback(error) if error? if (callback == null) { callback = function(error, updates) {}; }
#console.log "options", options if (!options.min_count) { options.min_count = 25; }
PackManager.getOpsByVersionRange project_id, doc_id, options.from, options.to, (error, updates) -> let summarizedUpdates = [];
return callback(error) if error? const { before } = options;
callback null, updates let nextBeforeTimestamp = null;
return UpdatesManager.processUncompressedUpdatesForProject(project_id, function(error) {
if (error != null) { return callback(error); }
return PackManager.makeProjectIterator(project_id, before, function(err, iterator) {
if (err != null) { return callback(err); }
// repeatedly get updates and pass them through the summariser to get an final output with user info
return async.whilst(() =>
//console.log "checking iterator.done", iterator.done()
(summarizedUpdates.length < options.min_count) && !iterator.done()
getDocUpdatesWithUserInfo: (project_id, doc_id, options = {}, callback = (error, updates) ->) -> , cb =>
UpdatesManager.getDocUpdates project_id, doc_id, options, (error, updates) -> iterator.next(function(err, partialUpdates) {
return callback(error) if error? if (err != null) { return callback(err); }
UpdatesManager.fillUserInfo updates, (error, updates) -> //logger.log {partialUpdates}, 'got partialUpdates'
return callback(error) if error? if (partialUpdates.length === 0) { return cb(); } //# FIXME should try to avoid this happening
callback null, updates nextBeforeTimestamp = partialUpdates[partialUpdates.length - 1].meta.end_ts;
// add the updates to the summary list
summarizedUpdates = UpdatesManager._summarizeUpdates(partialUpdates, summarizedUpdates);
return cb();
})
getSummarizedProjectUpdates: (project_id, options = {}, callback = (error, updates) ->) -> , () =>
options.min_count ||= 25 // finally done all updates
summarizedUpdates = [] //console.log 'summarized Updates', summarizedUpdates
before = options.before UpdatesManager.fillSummarizedUserInfo(summarizedUpdates, function(err, results) {
nextBeforeTimestamp = null if (err != null) { return callback(err); }
UpdatesManager.processUncompressedUpdatesForProject project_id, (error) -> return callback(null, results, !iterator.done() ? nextBeforeTimestamp : undefined);
return callback(error) if error? })
PackManager.makeProjectIterator project_id, before, (err, iterator) -> );
return callback(err) if err? });
# repeatedly get updates and pass them through the summariser to get an final output with user info });
async.whilst () -> },
#console.log "checking iterator.done", iterator.done()
return summarizedUpdates.length < options.min_count and not iterator.done()
, (cb) ->
iterator.next (err, partialUpdates) ->
return callback(err) if err?
#logger.log {partialUpdates}, 'got partialUpdates'
return cb() if partialUpdates.length is 0 ## FIXME should try to avoid this happening
nextBeforeTimestamp = partialUpdates[partialUpdates.length - 1].meta.end_ts
# add the updates to the summary list
summarizedUpdates = UpdatesManager._summarizeUpdates partialUpdates, summarizedUpdates
cb()
, () ->
# finally done all updates
#console.log 'summarized Updates', summarizedUpdates
UpdatesManager.fillSummarizedUserInfo summarizedUpdates, (err, results) ->
return callback(err) if err?
callback null, results, if not iterator.done() then nextBeforeTimestamp else undefined
fetchUserInfo: (users, callback = (error, fetchedUserInfo) ->) -> fetchUserInfo(users, callback) {
jobs = [] if (callback == null) { callback = function(error, fetchedUserInfo) {}; }
fetchedUserInfo = {} const jobs = [];
for user_id of users const fetchedUserInfo = {};
do (user_id) -> for (let user_id in users) {
jobs.push (callback) -> (user_id =>
WebApiManager.getUserInfo user_id, (error, userInfo) -> jobs.push(callback =>
return callback(error) if error? WebApiManager.getUserInfo(user_id, function(error, userInfo) {
fetchedUserInfo[user_id] = userInfo if (error != null) { return callback(error); }
callback() fetchedUserInfo[user_id] = userInfo;
return callback();
})
)
)(user_id);
}
async.series jobs, (err) -> return async.series(jobs, function(err) {
return callback(err) if err? if (err != null) { return callback(err); }
callback(null, fetchedUserInfo) return callback(null, fetchedUserInfo);
});
},
fillUserInfo: (updates, callback = (error, updates) ->) -> fillUserInfo(updates, callback) {
users = {} let update, user_id;
for update in updates if (callback == null) { callback = function(error, updates) {}; }
user_id = update.meta.user_id const users = {};
if UpdatesManager._validUserId(user_id) for (update of Array.from(updates)) {
users[user_id] = true ({ user_id } = update.meta);
if (UpdatesManager._validUserId(user_id)) {
users[user_id] = true;
}
}
UpdatesManager.fetchUserInfo users, (error, fetchedUserInfo) -> return UpdatesManager.fetchUserInfo(users, function(error, fetchedUserInfo) {
return callback(error) if error? if (error != null) { return callback(error); }
for update in updates for (update of Array.from(updates)) {
user_id = update.meta.user_id ({ user_id } = update.meta);
delete update.meta.user_id delete update.meta.user_id;
if UpdatesManager._validUserId(user_id) if (UpdatesManager._validUserId(user_id)) {
update.meta.user = fetchedUserInfo[user_id] update.meta.user = fetchedUserInfo[user_id];
callback null, updates }
}
return callback(null, updates);
});
},
fillSummarizedUserInfo: (updates, callback = (error, updates) ->) -> fillSummarizedUserInfo(updates, callback) {
users = {} let update, user_id, user_ids;
for update in updates if (callback == null) { callback = function(error, updates) {}; }
user_ids = update.meta.user_ids or [] const users = {};
for user_id in user_ids for (update of Array.from(updates)) {
if UpdatesManager._validUserId(user_id) user_ids = update.meta.user_ids || [];
users[user_id] = true for (user_id of Array.from(user_ids)) {
if (UpdatesManager._validUserId(user_id)) {
users[user_id] = true;
}
}
}
UpdatesManager.fetchUserInfo users, (error, fetchedUserInfo) -> return UpdatesManager.fetchUserInfo(users, function(error, fetchedUserInfo) {
return callback(error) if error? if (error != null) { return callback(error); }
for update in updates for (update of Array.from(updates)) {
user_ids = update.meta.user_ids or [] user_ids = update.meta.user_ids || [];
update.meta.users = [] update.meta.users = [];
delete update.meta.user_ids delete update.meta.user_ids;
for user_id in user_ids for (user_id of Array.from(user_ids)) {
if UpdatesManager._validUserId(user_id) if (UpdatesManager._validUserId(user_id)) {
update.meta.users.push fetchedUserInfo[user_id] update.meta.users.push(fetchedUserInfo[user_id]);
else } else {
update.meta.users.push null update.meta.users.push(null);
callback null, updates }
}
}
return callback(null, updates);
});
},
_validUserId: (user_id) -> _validUserId(user_id) {
if !user_id? if ((user_id == null)) {
return false return false;
else } else {
return !!user_id.match(/^[a-f0-9]{24}$/) return !!user_id.match(/^[a-f0-9]{24}$/);
}
},
TIME_BETWEEN_DISTINCT_UPDATES: fiveMinutes = 5 * 60 * 1000 TIME_BETWEEN_DISTINCT_UPDATES: (fiveMinutes = 5 * 60 * 1000),
SPLIT_ON_DELETE_SIZE: 16 # characters SPLIT_ON_DELETE_SIZE: 16, // characters
_summarizeUpdates: (updates, existingSummarizedUpdates = []) -> _summarizeUpdates(updates, existingSummarizedUpdates) {
summarizedUpdates = existingSummarizedUpdates.slice() if (existingSummarizedUpdates == null) { existingSummarizedUpdates = []; }
previousUpdateWasBigDelete = false const summarizedUpdates = existingSummarizedUpdates.slice();
for update in updates let previousUpdateWasBigDelete = false;
earliestUpdate = summarizedUpdates[summarizedUpdates.length - 1] for (let update of Array.from(updates)) {
shouldConcat = false var doc_id;
const earliestUpdate = summarizedUpdates[summarizedUpdates.length - 1];
let shouldConcat = false;
# If a user inserts some text, then deletes a big chunk including that text, // If a user inserts some text, then deletes a big chunk including that text,
# the update we show might concat the insert and delete, and there will be no sign // the update we show might concat the insert and delete, and there will be no sign
# of that insert having happened, or be able to restore to it (restoring after a big delete is common). // of that insert having happened, or be able to restore to it (restoring after a big delete is common).
# So, we split the summary on 'big' deletes. However, we've stepping backwards in time with // So, we split the summary on 'big' deletes. However, we've stepping backwards in time with
# most recent changes considered first, so if this update is a big delete, we want to start // most recent changes considered first, so if this update is a big delete, we want to start
# a new summarized update next timge, hence we monitor the previous update. // a new summarized update next timge, hence we monitor the previous update.
if previousUpdateWasBigDelete if (previousUpdateWasBigDelete) {
shouldConcat = false shouldConcat = false;
else if earliestUpdate and earliestUpdate.meta.end_ts - update.meta.start_ts < @TIME_BETWEEN_DISTINCT_UPDATES } else if (earliestUpdate && ((earliestUpdate.meta.end_ts - update.meta.start_ts) < this.TIME_BETWEEN_DISTINCT_UPDATES)) {
# We're going backwards in time through the updates, so only combine if this update starts less than 5 minutes before // We're going backwards in time through the updates, so only combine if this update starts less than 5 minutes before
# the end of current summarized block, so no block spans more than 5 minutes. // the end of current summarized block, so no block spans more than 5 minutes.
shouldConcat = true shouldConcat = true;
}
isBigDelete = false let isBigDelete = false;
for op in update.op or [] for (let op of Array.from(update.op || [])) {
if op.d? and op.d.length > @SPLIT_ON_DELETE_SIZE if ((op.d != null) && (op.d.length > this.SPLIT_ON_DELETE_SIZE)) {
isBigDelete = true isBigDelete = true;
}
}
previousUpdateWasBigDelete = isBigDelete previousUpdateWasBigDelete = isBigDelete;
if shouldConcat if (shouldConcat) {
# check if the user in this update is already present in the earliest update, // check if the user in this update is already present in the earliest update,
# if not, add them to the users list of the earliest update // if not, add them to the users list of the earliest update
earliestUpdate.meta.user_ids = _.union earliestUpdate.meta.user_ids, [update.meta.user_id] earliestUpdate.meta.user_ids = _.union(earliestUpdate.meta.user_ids, [update.meta.user_id]);
doc_id = update.doc_id.toString() doc_id = update.doc_id.toString();
doc = earliestUpdate.docs[doc_id] const doc = earliestUpdate.docs[doc_id];
if doc? if (doc != null) {
doc.fromV = Math.min(doc.fromV, update.v) doc.fromV = Math.min(doc.fromV, update.v);
doc.toV = Math.max(doc.toV, update.v) doc.toV = Math.max(doc.toV, update.v);
else } else {
earliestUpdate.docs[doc_id] = earliestUpdate.docs[doc_id] = {
fromV: update.v fromV: update.v,
toV: update.v toV: update.v
};
}
earliestUpdate.meta.start_ts = Math.min(earliestUpdate.meta.start_ts, update.meta.start_ts) earliestUpdate.meta.start_ts = Math.min(earliestUpdate.meta.start_ts, update.meta.start_ts);
earliestUpdate.meta.end_ts = Math.max(earliestUpdate.meta.end_ts, update.meta.end_ts) earliestUpdate.meta.end_ts = Math.max(earliestUpdate.meta.end_ts, update.meta.end_ts);
else } else {
newUpdate = const newUpdate = {
meta: meta: {
user_ids: [] user_ids: [],
start_ts: update.meta.start_ts start_ts: update.meta.start_ts,
end_ts: update.meta.end_ts end_ts: update.meta.end_ts
},
docs: {} docs: {}
};
newUpdate.docs[update.doc_id.toString()] = newUpdate.docs[update.doc_id.toString()] = {
fromV: update.v fromV: update.v,
toV: update.v toV: update.v
newUpdate.meta.user_ids.push update.meta.user_id };
summarizedUpdates.push newUpdate newUpdate.meta.user_ids.push(update.meta.user_id);
summarizedUpdates.push(newUpdate);
}
}
return summarizedUpdates return summarizedUpdates;
}
});
function __guard__(value, transform) {
return (typeof value !== 'undefined' && value !== null) ? transform(value) : undefined;
}

View file

@ -1,69 +1,99 @@
request = require "requestretry" # allow retry on error https://github.com/FGRibreau/node-request-retry /*
logger = require "logger-sharelatex" * decaffeinate suggestions:
Settings = require "settings-sharelatex" * DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let WebApiManager;
const request = require("requestretry"); // allow retry on error https://github.com/FGRibreau/node-request-retry
const logger = require("logger-sharelatex");
const Settings = require("settings-sharelatex");
# Don't let HTTP calls hang for a long time // Don't let HTTP calls hang for a long time
MAX_HTTP_REQUEST_LENGTH = 15000 # 15 seconds const MAX_HTTP_REQUEST_LENGTH = 15000; // 15 seconds
# DEPRECATED! This method of getting user details via track-changes is deprecated // DEPRECATED! This method of getting user details via track-changes is deprecated
# in the way we lay out our services. // in the way we lay out our services.
# Instead, web should be responsible for collecting the raw data (user_ids) and // Instead, web should be responsible for collecting the raw data (user_ids) and
# filling it out with calls to other services. All API calls should create a // filling it out with calls to other services. All API calls should create a
# tree-like structure as much as possible, with web as the root. // tree-like structure as much as possible, with web as the root.
module.exports = WebApiManager = module.exports = (WebApiManager = {
sendRequest: (url, callback = (error, body) ->) -> sendRequest(url, callback) {
request.get { if (callback == null) { callback = function(error, body) {}; }
url: "#{Settings.apis.web.url}#{url}" return request.get({
timeout: MAX_HTTP_REQUEST_LENGTH url: `${Settings.apis.web.url}${url}`,
maxAttempts: 2 # for node-request-retry timeout: MAX_HTTP_REQUEST_LENGTH,
auth: maxAttempts: 2, // for node-request-retry
user: Settings.apis.web.user auth: {
pass: Settings.apis.web.pass user: Settings.apis.web.user,
pass: Settings.apis.web.pass,
sendImmediately: true sendImmediately: true
}, (error, res, body)-> }
if error? }, function(error, res, body){
return callback(error) if (error != null) {
if res.statusCode == 404 return callback(error);
logger.log url: url, "got 404 from web api" }
return callback null, null if (res.statusCode === 404) {
if res.statusCode >= 200 and res.statusCode < 300 logger.log({url}, "got 404 from web api");
return callback null, body return callback(null, null);
else }
error = new Error("web returned a non-success status code: #{res.statusCode} (attempts: #{res.attempts})") if ((res.statusCode >= 200) && (res.statusCode < 300)) {
callback error return callback(null, body);
} else {
error = new Error(`web returned a non-success status code: ${res.statusCode} (attempts: ${res.attempts})`);
return callback(error);
}
});
},
getUserInfo: (user_id, callback = (error, userInfo) ->) -> getUserInfo(user_id, callback) {
url = "/user/#{user_id}/personal_info" if (callback == null) { callback = function(error, userInfo) {}; }
logger.log user_id: user_id, "getting user info from web" const url = `/user/${user_id}/personal_info`;
WebApiManager.sendRequest url, (error, body) -> logger.log({user_id}, "getting user info from web");
if error? return WebApiManager.sendRequest(url, function(error, body) {
logger.error err: error, user_id: user_id, url: url, "error accessing web" let user;
return callback error if (error != null) {
logger.error({err: error, user_id, url}, "error accessing web");
if body == null return callback(error);
logger.error user_id: user_id, url: url, "no user found"
return callback null, null
try
user = JSON.parse(body)
catch error
return callback(error)
callback null, {
id: user.id
email: user.email
first_name: user.first_name
last_name: user.last_name
} }
getProjectDetails: (project_id, callback = (error, details) ->) -> if (body === null) {
url = "/project/#{project_id}/details" logger.error({user_id, url}, "no user found");
logger.log project_id: project_id, "getting project details from web" return callback(null, null);
WebApiManager.sendRequest url, (error, body) -> }
if error? try {
logger.error err: error, project_id: project_id, url: url, "error accessing web" user = JSON.parse(body);
return callback error } catch (error1) {
error = error1;
return callback(error);
}
return callback(null, {
id: user.id,
email: user.email,
first_name: user.first_name,
last_name: user.last_name
});
});
},
try getProjectDetails(project_id, callback) {
project = JSON.parse(body) if (callback == null) { callback = function(error, details) {}; }
catch error const url = `/project/${project_id}/details`;
return callback(error) logger.log({project_id}, "getting project details from web");
callback null, project return WebApiManager.sendRequest(url, function(error, body) {
let project;
if (error != null) {
logger.error({err: error, project_id, url}, "error accessing web");
return callback(error);
}
try {
project = JSON.parse(body);
} catch (error1) {
error = error1;
return callback(error);
}
return callback(null, project);
});
}
});

View file

@ -1,9 +1,10 @@
Settings = require "settings-sharelatex" const Settings = require("settings-sharelatex");
mongojs = require "mongojs" const mongojs = require("mongojs");
bson = require "bson" const bson = require("bson");
db = mongojs(Settings.mongo.url, ["docHistory", "projectHistoryMetaData", "docHistoryIndex"]) const db = mongojs(Settings.mongo.url, ["docHistory", "projectHistoryMetaData", "docHistoryIndex"]);
module.exports = module.exports = {
db: db db,
ObjectId: mongojs.ObjectId ObjectId: mongojs.ObjectId,
BSON: new bson.BSONPure() BSON: new bson.BSONPure()
};