From 80375ae2dddc470cc98c5d721c524b19b770083c Mon Sep 17 00:00:00 2001 From: James Allen Date: Fri, 16 Sep 2016 11:33:36 +0100 Subject: [PATCH 01/16] Run a diff against big delete - insert changes which are likely copy-pastes --- .../app/coffee/UpdateCompressor.coffee | 44 + .../track-changes/app/lib/diff_match_patch.js | 2193 +++++++++++++++++ .../UpdateCompressorTests.coffee | 41 + 3 files changed, 2278 insertions(+) create mode 100644 services/track-changes/app/lib/diff_match_patch.js diff --git a/services/track-changes/app/coffee/UpdateCompressor.coffee b/services/track-changes/app/coffee/UpdateCompressor.coffee index 98b2e048bf..f55d7462ea 100644 --- a/services/track-changes/app/coffee/UpdateCompressor.coffee +++ b/services/track-changes/app/coffee/UpdateCompressor.coffee @@ -1,6 +1,9 @@ strInject = (s1, pos, s2) -> s1[...pos] + s2 + s1[pos..] strRemove = (s1, pos, length) -> s1[...pos] + s1[(pos + length)..] +diff_match_patch = require("../lib/diff_match_patch").diff_match_patch +dmp = new diff_match_patch() + module.exports = UpdateCompressor = NOOP: "noop" @@ -155,6 +158,47 @@ module.exports = UpdateCompressor = # This will only happen if the delete extends outside the insert return [firstUpdate, secondUpdate] + # A delete then an insert at the same place, likely a copy-paste of a chunk of content + else if firstOp.d? and secondOp.i? and firstOp.p == secondOp.p + offset = firstOp.p + diff_ops = @diffAsShareJsOps(firstOp.d, secondOp.i) + return diff_ops.map (op) -> + op.p += offset + return { + meta: + start_ts: firstUpdate.meta.start_ts + end_ts: secondUpdate.meta.end_ts + user_id: firstUpdate.meta.user_id + op: op + v: secondUpdate.v + } + else return [firstUpdate, secondUpdate] + ADDED: 1 + REMOVED: -1 + UNCHANGED: 0 + diffAsShareJsOps: (before, after, callback = (error, ops) ->) -> + diffs = dmp.diff_main(before, after) + dmp.diff_cleanupSemantic(diffs) + + ops = [] + position = 0 + for diff in diffs + type = diff[0] + content = diff[1] + if type == @ADDED + ops.push + i: content + p: position + position += content.length + else if type == @REMOVED + ops.push + d: content + p: position + else if type == @UNCHANGED + position += content.length + else + throw "Unknown type" + return ops diff --git a/services/track-changes/app/lib/diff_match_patch.js b/services/track-changes/app/lib/diff_match_patch.js new file mode 100644 index 0000000000..112130e097 --- /dev/null +++ b/services/track-changes/app/lib/diff_match_patch.js @@ -0,0 +1,2193 @@ +/** + * Diff Match and Patch + * + * Copyright 2006 Google Inc. + * http://code.google.com/p/google-diff-match-patch/ + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @fileoverview Computes the difference between two texts to create a patch. + * Applies the patch onto another text, allowing for errors. + * @author fraser@google.com (Neil Fraser) + */ + +/** + * Class containing the diff, match and patch methods. + * @constructor + */ +function diff_match_patch() { + + // Defaults. + // Redefine these in your program to override the defaults. + + // Number of seconds to map a diff before giving up (0 for infinity). + this.Diff_Timeout = 1.0; + // Cost of an empty edit operation in terms of edit characters. + this.Diff_EditCost = 4; + // At what point is no match declared (0.0 = perfection, 1.0 = very loose). + this.Match_Threshold = 0.5; + // How far to search for a match (0 = exact location, 1000+ = broad match). + // A match this many characters away from the expected location will add + // 1.0 to the score (0.0 is a perfect match). + this.Match_Distance = 1000; + // When deleting a large block of text (over ~64 characters), how close do + // the contents have to be to match the expected contents. (0.0 = perfection, + // 1.0 = very loose). Note that Match_Threshold controls how closely the + // end points of a delete need to match. + this.Patch_DeleteThreshold = 0.5; + // Chunk size for context length. + this.Patch_Margin = 4; + + // The number of bits in an int. + this.Match_MaxBits = 32; +} + + +// DIFF FUNCTIONS + + +/** + * The data structure representing a diff is an array of tuples: + * [[DIFF_DELETE, 'Hello'], [DIFF_INSERT, 'Goodbye'], [DIFF_EQUAL, ' world.']] + * which means: delete 'Hello', add 'Goodbye' and keep ' world.' + */ +var DIFF_DELETE = -1; +var DIFF_INSERT = 1; +var DIFF_EQUAL = 0; + +/** @typedef {{0: number, 1: string}} */ +diff_match_patch.Diff; + + +/** + * Find the differences between two texts. Simplifies the problem by stripping + * any common prefix or suffix off the texts before diffing. + * @param {string} text1 Old string to be diffed. + * @param {string} text2 New string to be diffed. + * @param {boolean=} opt_checklines Optional speedup flag. If present and false, + * then don't run a line-level diff first to identify the changed areas. + * Defaults to true, which does a faster, slightly less optimal diff. + * @param {number} opt_deadline Optional time when the diff should be complete + * by. Used internally for recursive calls. Users should set DiffTimeout + * instead. + * @return {!Array.} Array of diff tuples. + */ +diff_match_patch.prototype.diff_main = function(text1, text2, opt_checklines, + opt_deadline) { + // Set a deadline by which time the diff must be complete. + if (typeof opt_deadline == 'undefined') { + if (this.Diff_Timeout <= 0) { + opt_deadline = Number.MAX_VALUE; + } else { + opt_deadline = (new Date).getTime() + this.Diff_Timeout * 1000; + } + } + var deadline = opt_deadline; + + // Check for null inputs. + if (text1 == null || text2 == null) { + throw new Error('Null input. (diff_main)'); + } + + // Check for equality (speedup). + if (text1 == text2) { + if (text1) { + return [[DIFF_EQUAL, text1]]; + } + return []; + } + + if (typeof opt_checklines == 'undefined') { + opt_checklines = true; + } + var checklines = opt_checklines; + + // Trim off common prefix (speedup). + var commonlength = this.diff_commonPrefix(text1, text2); + var commonprefix = text1.substring(0, commonlength); + text1 = text1.substring(commonlength); + text2 = text2.substring(commonlength); + + // Trim off common suffix (speedup). + commonlength = this.diff_commonSuffix(text1, text2); + var commonsuffix = text1.substring(text1.length - commonlength); + text1 = text1.substring(0, text1.length - commonlength); + text2 = text2.substring(0, text2.length - commonlength); + + // Compute the diff on the middle block. + var diffs = this.diff_compute_(text1, text2, checklines, deadline); + + // Restore the prefix and suffix. + if (commonprefix) { + diffs.unshift([DIFF_EQUAL, commonprefix]); + } + if (commonsuffix) { + diffs.push([DIFF_EQUAL, commonsuffix]); + } + this.diff_cleanupMerge(diffs); + return diffs; +}; + + +/** + * Find the differences between two texts. Assumes that the texts do not + * have any common prefix or suffix. + * @param {string} text1 Old string to be diffed. + * @param {string} text2 New string to be diffed. + * @param {boolean} checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster, slightly less optimal diff. + * @param {number} deadline Time when the diff should be complete by. + * @return {!Array.} Array of diff tuples. + * @private + */ +diff_match_patch.prototype.diff_compute_ = function(text1, text2, checklines, + deadline) { + var diffs; + + if (!text1) { + // Just add some text (speedup). + return [[DIFF_INSERT, text2]]; + } + + if (!text2) { + // Just delete some text (speedup). + return [[DIFF_DELETE, text1]]; + } + + var longtext = text1.length > text2.length ? text1 : text2; + var shorttext = text1.length > text2.length ? text2 : text1; + var i = longtext.indexOf(shorttext); + if (i != -1) { + // Shorter text is inside the longer text (speedup). + diffs = [[DIFF_INSERT, longtext.substring(0, i)], + [DIFF_EQUAL, shorttext], + [DIFF_INSERT, longtext.substring(i + shorttext.length)]]; + // Swap insertions for deletions if diff is reversed. + if (text1.length > text2.length) { + diffs[0][0] = diffs[2][0] = DIFF_DELETE; + } + return diffs; + } + + if (shorttext.length == 1) { + // Single character string. + // After the previous speedup, the character can't be an equality. + return [[DIFF_DELETE, text1], [DIFF_INSERT, text2]]; + } + + // Check to see if the problem can be split in two. + var hm = this.diff_halfMatch_(text1, text2); + if (hm) { + // A half-match was found, sort out the return data. + var text1_a = hm[0]; + var text1_b = hm[1]; + var text2_a = hm[2]; + var text2_b = hm[3]; + var mid_common = hm[4]; + // Send both pairs off for separate processing. + var diffs_a = this.diff_main(text1_a, text2_a, checklines, deadline); + var diffs_b = this.diff_main(text1_b, text2_b, checklines, deadline); + // Merge the results. + return diffs_a.concat([[DIFF_EQUAL, mid_common]], diffs_b); + } + + if (checklines && text1.length > 100 && text2.length > 100) { + return this.diff_lineMode_(text1, text2, deadline); + } + + return this.diff_bisect_(text1, text2, deadline); +}; + + +/** + * Do a quick line-level diff on both strings, then rediff the parts for + * greater accuracy. + * This speedup can produce non-minimal diffs. + * @param {string} text1 Old string to be diffed. + * @param {string} text2 New string to be diffed. + * @param {number} deadline Time when the diff should be complete by. + * @return {!Array.} Array of diff tuples. + * @private + */ +diff_match_patch.prototype.diff_lineMode_ = function(text1, text2, deadline) { + // Scan the text on a line-by-line basis first. + var a = this.diff_linesToChars_(text1, text2); + text1 = a.chars1; + text2 = a.chars2; + var linearray = a.lineArray; + + var diffs = this.diff_main(text1, text2, false, deadline); + + // Convert the diff back to original text. + this.diff_charsToLines_(diffs, linearray); + // Eliminate freak matches (e.g. blank lines) + this.diff_cleanupSemantic(diffs); + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs.push([DIFF_EQUAL, '']); + var pointer = 0; + var count_delete = 0; + var count_insert = 0; + var text_delete = ''; + var text_insert = ''; + while (pointer < diffs.length) { + switch (diffs[pointer][0]) { + case DIFF_INSERT: + count_insert++; + text_insert += diffs[pointer][1]; + break; + case DIFF_DELETE: + count_delete++; + text_delete += diffs[pointer][1]; + break; + case DIFF_EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete >= 1 && count_insert >= 1) { + // Delete the offending records and add the merged ones. + diffs.splice(pointer - count_delete - count_insert, + count_delete + count_insert); + pointer = pointer - count_delete - count_insert; + var a = this.diff_main(text_delete, text_insert, false, deadline); + for (var j = a.length - 1; j >= 0; j--) { + diffs.splice(pointer, 0, a[j]); + } + pointer = pointer + a.length; + } + count_insert = 0; + count_delete = 0; + text_delete = ''; + text_insert = ''; + break; + } + pointer++; + } + diffs.pop(); // Remove the dummy entry at the end. + + return diffs; +}; + + +/** + * Find the 'middle snake' of a diff, split the problem in two + * and return the recursively constructed diff. + * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. + * @param {string} text1 Old string to be diffed. + * @param {string} text2 New string to be diffed. + * @param {number} deadline Time at which to bail if not yet complete. + * @return {!Array.} Array of diff tuples. + * @private + */ +diff_match_patch.prototype.diff_bisect_ = function(text1, text2, deadline) { + // Cache the text lengths to prevent multiple calls. + var text1_length = text1.length; + var text2_length = text2.length; + var max_d = Math.ceil((text1_length + text2_length) / 2); + var v_offset = max_d; + var v_length = 2 * max_d; + var v1 = new Array(v_length); + var v2 = new Array(v_length); + // Setting all elements to -1 is faster in Chrome & Firefox than mixing + // integers and undefined. + for (var x = 0; x < v_length; x++) { + v1[x] = -1; + v2[x] = -1; + } + v1[v_offset + 1] = 0; + v2[v_offset + 1] = 0; + var delta = text1_length - text2_length; + // If the total number of characters is odd, then the front path will collide + // with the reverse path. + var front = (delta % 2 != 0); + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + var k1start = 0; + var k1end = 0; + var k2start = 0; + var k2end = 0; + for (var d = 0; d < max_d; d++) { + // Bail out if deadline is reached. + if ((new Date()).getTime() > deadline) { + break; + } + + // Walk the front path one step. + for (var k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { + var k1_offset = v_offset + k1; + var x1; + if (k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1])) { + x1 = v1[k1_offset + 1]; + } else { + x1 = v1[k1_offset - 1] + 1; + } + var y1 = x1 - k1; + while (x1 < text1_length && y1 < text2_length && + text1.charAt(x1) == text2.charAt(y1)) { + x1++; + y1++; + } + v1[k1_offset] = x1; + if (x1 > text1_length) { + // Ran off the right of the graph. + k1end += 2; + } else if (y1 > text2_length) { + // Ran off the bottom of the graph. + k1start += 2; + } else if (front) { + var k2_offset = v_offset + delta - k1; + if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) { + // Mirror x2 onto top-left coordinate system. + var x2 = text1_length - v2[k2_offset]; + if (x1 >= x2) { + // Overlap detected. + return this.diff_bisectSplit_(text1, text2, x1, y1, deadline); + } + } + } + } + + // Walk the reverse path one step. + for (var k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { + var k2_offset = v_offset + k2; + var x2; + if (k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1])) { + x2 = v2[k2_offset + 1]; + } else { + x2 = v2[k2_offset - 1] + 1; + } + var y2 = x2 - k2; + while (x2 < text1_length && y2 < text2_length && + text1.charAt(text1_length - x2 - 1) == + text2.charAt(text2_length - y2 - 1)) { + x2++; + y2++; + } + v2[k2_offset] = x2; + if (x2 > text1_length) { + // Ran off the left of the graph. + k2end += 2; + } else if (y2 > text2_length) { + // Ran off the top of the graph. + k2start += 2; + } else if (!front) { + var k1_offset = v_offset + delta - k2; + if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) { + var x1 = v1[k1_offset]; + var y1 = v_offset + x1 - k1_offset; + // Mirror x2 onto top-left coordinate system. + x2 = text1_length - x2; + if (x1 >= x2) { + // Overlap detected. + return this.diff_bisectSplit_(text1, text2, x1, y1, deadline); + } + } + } + } + } + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + return [[DIFF_DELETE, text1], [DIFF_INSERT, text2]]; +}; + + +/** + * Given the location of the 'middle snake', split the diff in two parts + * and recurse. + * @param {string} text1 Old string to be diffed. + * @param {string} text2 New string to be diffed. + * @param {number} x Index of split point in text1. + * @param {number} y Index of split point in text2. + * @param {number} deadline Time at which to bail if not yet complete. + * @return {!Array.} Array of diff tuples. + * @private + */ +diff_match_patch.prototype.diff_bisectSplit_ = function(text1, text2, x, y, + deadline) { + var text1a = text1.substring(0, x); + var text2a = text2.substring(0, y); + var text1b = text1.substring(x); + var text2b = text2.substring(y); + + // Compute both diffs serially. + var diffs = this.diff_main(text1a, text2a, false, deadline); + var diffsb = this.diff_main(text1b, text2b, false, deadline); + + return diffs.concat(diffsb); +}; + + +/** + * Split two texts into an array of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param {string} text1 First string. + * @param {string} text2 Second string. + * @return {{chars1: string, chars2: string, lineArray: !Array.}} + * An object containing the encoded text1, the encoded text2 and + * the array of unique strings. + * The zeroth element of the array of unique strings is intentionally blank. + * @private + */ +diff_match_patch.prototype.diff_linesToChars_ = function(text1, text2) { + var lineArray = []; // e.g. lineArray[4] == 'Hello\n' + var lineHash = {}; // e.g. lineHash['Hello\n'] == 4 + + // '\x00' is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a null character. + lineArray[0] = ''; + + /** + * Split a text into an array of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * Modifies linearray and linehash through being a closure. + * @param {string} text String to encode. + * @return {string} Encoded string. + * @private + */ + function diff_linesToCharsMunge_(text) { + var chars = ''; + // Walk the text, pulling out a substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + var lineStart = 0; + var lineEnd = -1; + // Keeping our own length variable is faster than looking it up. + var lineArrayLength = lineArray.length; + while (lineEnd < text.length - 1) { + lineEnd = text.indexOf('\n', lineStart); + if (lineEnd == -1) { + lineEnd = text.length - 1; + } + var line = text.substring(lineStart, lineEnd + 1); + lineStart = lineEnd + 1; + + if (lineHash.hasOwnProperty ? lineHash.hasOwnProperty(line) : + (lineHash[line] !== undefined)) { + chars += String.fromCharCode(lineHash[line]); + } else { + chars += String.fromCharCode(lineArrayLength); + lineHash[line] = lineArrayLength; + lineArray[lineArrayLength++] = line; + } + } + return chars; + } + + var chars1 = diff_linesToCharsMunge_(text1); + var chars2 = diff_linesToCharsMunge_(text2); + return {chars1: chars1, chars2: chars2, lineArray: lineArray}; +}; + + +/** + * Rehydrate the text in a diff from a string of line hashes to real lines of + * text. + * @param {!Array.} diffs Array of diff tuples. + * @param {!Array.} lineArray Array of unique strings. + * @private + */ +diff_match_patch.prototype.diff_charsToLines_ = function(diffs, lineArray) { + for (var x = 0; x < diffs.length; x++) { + var chars = diffs[x][1]; + var text = []; + for (var y = 0; y < chars.length; y++) { + text[y] = lineArray[chars.charCodeAt(y)]; + } + diffs[x][1] = text.join(''); + } +}; + + +/** + * Determine the common prefix of two strings. + * @param {string} text1 First string. + * @param {string} text2 Second string. + * @return {number} The number of characters common to the start of each + * string. + */ +diff_match_patch.prototype.diff_commonPrefix = function(text1, text2) { + // Quick check for common null cases. + if (!text1 || !text2 || text1.charAt(0) != text2.charAt(0)) { + return 0; + } + // Binary search. + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + var pointermin = 0; + var pointermax = Math.min(text1.length, text2.length); + var pointermid = pointermax; + var pointerstart = 0; + while (pointermin < pointermid) { + if (text1.substring(pointerstart, pointermid) == + text2.substring(pointerstart, pointermid)) { + pointermin = pointermid; + pointerstart = pointermin; + } else { + pointermax = pointermid; + } + pointermid = Math.floor((pointermax - pointermin) / 2 + pointermin); + } + return pointermid; +}; + + +/** + * Determine the common suffix of two strings. + * @param {string} text1 First string. + * @param {string} text2 Second string. + * @return {number} The number of characters common to the end of each string. + */ +diff_match_patch.prototype.diff_commonSuffix = function(text1, text2) { + // Quick check for common null cases. + if (!text1 || !text2 || + text1.charAt(text1.length - 1) != text2.charAt(text2.length - 1)) { + return 0; + } + // Binary search. + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + var pointermin = 0; + var pointermax = Math.min(text1.length, text2.length); + var pointermid = pointermax; + var pointerend = 0; + while (pointermin < pointermid) { + if (text1.substring(text1.length - pointermid, text1.length - pointerend) == + text2.substring(text2.length - pointermid, text2.length - pointerend)) { + pointermin = pointermid; + pointerend = pointermin; + } else { + pointermax = pointermid; + } + pointermid = Math.floor((pointermax - pointermin) / 2 + pointermin); + } + return pointermid; +}; + + +/** + * Determine if the suffix of one string is the prefix of another. + * @param {string} text1 First string. + * @param {string} text2 Second string. + * @return {number} The number of characters common to the end of the first + * string and the start of the second string. + * @private + */ +diff_match_patch.prototype.diff_commonOverlap_ = function(text1, text2) { + // Cache the text lengths to prevent multiple calls. + var text1_length = text1.length; + var text2_length = text2.length; + // Eliminate the null case. + if (text1_length == 0 || text2_length == 0) { + return 0; + } + // Truncate the longer string. + if (text1_length > text2_length) { + text1 = text1.substring(text1_length - text2_length); + } else if (text1_length < text2_length) { + text2 = text2.substring(0, text1_length); + } + var text_length = Math.min(text1_length, text2_length); + // Quick check for the worst case. + if (text1 == text2) { + return text_length; + } + + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + var best = 0; + var length = 1; + while (true) { + var pattern = text1.substring(text_length - length); + var found = text2.indexOf(pattern); + if (found == -1) { + return best; + } + length += found; + if (found == 0 || text1.substring(text_length - length) == + text2.substring(0, length)) { + best = length; + length++; + } + } +}; + + +/** + * Do the two texts share a substring which is at least half the length of the + * longer text? + * This speedup can produce non-minimal diffs. + * @param {string} text1 First string. + * @param {string} text2 Second string. + * @return {Array.} Five element Array, containing the prefix of + * text1, the suffix of text1, the prefix of text2, the suffix of + * text2 and the common middle. Or null if there was no match. + * @private + */ +diff_match_patch.prototype.diff_halfMatch_ = function(text1, text2) { + if (this.Diff_Timeout <= 0) { + // Don't risk returning a non-optimal diff if we have unlimited time. + return null; + } + var longtext = text1.length > text2.length ? text1 : text2; + var shorttext = text1.length > text2.length ? text2 : text1; + if (longtext.length < 4 || shorttext.length * 2 < longtext.length) { + return null; // Pointless. + } + var dmp = this; // 'this' becomes 'window' in a closure. + + /** + * Does a substring of shorttext exist within longtext such that the substring + * is at least half the length of longtext? + * Closure, but does not reference any external variables. + * @param {string} longtext Longer string. + * @param {string} shorttext Shorter string. + * @param {number} i Start index of quarter length substring within longtext. + * @return {Array.} Five element Array, containing the prefix of + * longtext, the suffix of longtext, the prefix of shorttext, the suffix + * of shorttext and the common middle. Or null if there was no match. + * @private + */ + function diff_halfMatchI_(longtext, shorttext, i) { + // Start with a 1/4 length substring at position i as a seed. + var seed = longtext.substring(i, i + Math.floor(longtext.length / 4)); + var j = -1; + var best_common = ''; + var best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b; + while ((j = shorttext.indexOf(seed, j + 1)) != -1) { + var prefixLength = dmp.diff_commonPrefix(longtext.substring(i), + shorttext.substring(j)); + var suffixLength = dmp.diff_commonSuffix(longtext.substring(0, i), + shorttext.substring(0, j)); + if (best_common.length < suffixLength + prefixLength) { + best_common = shorttext.substring(j - suffixLength, j) + + shorttext.substring(j, j + prefixLength); + best_longtext_a = longtext.substring(0, i - suffixLength); + best_longtext_b = longtext.substring(i + prefixLength); + best_shorttext_a = shorttext.substring(0, j - suffixLength); + best_shorttext_b = shorttext.substring(j + prefixLength); + } + } + if (best_common.length * 2 >= longtext.length) { + return [best_longtext_a, best_longtext_b, + best_shorttext_a, best_shorttext_b, best_common]; + } else { + return null; + } + } + + // First check if the second quarter is the seed for a half-match. + var hm1 = diff_halfMatchI_(longtext, shorttext, + Math.ceil(longtext.length / 4)); + // Check again based on the third quarter. + var hm2 = diff_halfMatchI_(longtext, shorttext, + Math.ceil(longtext.length / 2)); + var hm; + if (!hm1 && !hm2) { + return null; + } else if (!hm2) { + hm = hm1; + } else if (!hm1) { + hm = hm2; + } else { + // Both matched. Select the longest. + hm = hm1[4].length > hm2[4].length ? hm1 : hm2; + } + + // A half-match was found, sort out the return data. + var text1_a, text1_b, text2_a, text2_b; + if (text1.length > text2.length) { + text1_a = hm[0]; + text1_b = hm[1]; + text2_a = hm[2]; + text2_b = hm[3]; + } else { + text2_a = hm[0]; + text2_b = hm[1]; + text1_a = hm[2]; + text1_b = hm[3]; + } + var mid_common = hm[4]; + return [text1_a, text1_b, text2_a, text2_b, mid_common]; +}; + + +/** + * Reduce the number of edits by eliminating semantically trivial equalities. + * @param {!Array.} diffs Array of diff tuples. + */ +diff_match_patch.prototype.diff_cleanupSemantic = function(diffs) { + var changes = false; + var equalities = []; // Stack of indices where equalities are found. + var equalitiesLength = 0; // Keeping our own length var is faster in JS. + /** @type {?string} */ + var lastequality = null; + // Always equal to diffs[equalities[equalitiesLength - 1]][1] + var pointer = 0; // Index of current position. + // Number of characters that changed prior to the equality. + var length_insertions1 = 0; + var length_deletions1 = 0; + // Number of characters that changed after the equality. + var length_insertions2 = 0; + var length_deletions2 = 0; + while (pointer < diffs.length) { + if (diffs[pointer][0] == DIFF_EQUAL) { // Equality found. + equalities[equalitiesLength++] = pointer; + length_insertions1 = length_insertions2; + length_deletions1 = length_deletions2; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = diffs[pointer][1]; + } else { // An insertion or deletion. + if (diffs[pointer][0] == DIFF_INSERT) { + length_insertions2 += diffs[pointer][1].length; + } else { + length_deletions2 += diffs[pointer][1].length; + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + if (lastequality && (lastequality.length <= + Math.max(length_insertions1, length_deletions1)) && + (lastequality.length <= Math.max(length_insertions2, + length_deletions2))) { + // Duplicate record. + diffs.splice(equalities[equalitiesLength - 1], 0, + [DIFF_DELETE, lastequality]); + // Change second copy to insert. + diffs[equalities[equalitiesLength - 1] + 1][0] = DIFF_INSERT; + // Throw away the equality we just deleted. + equalitiesLength--; + // Throw away the previous equality (it needs to be reevaluated). + equalitiesLength--; + pointer = equalitiesLength > 0 ? equalities[equalitiesLength - 1] : -1; + length_insertions1 = 0; // Reset the counters. + length_deletions1 = 0; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = null; + changes = true; + } + } + pointer++; + } + + // Normalize the diff. + if (changes) { + this.diff_cleanupMerge(diffs); + } + this.diff_cleanupSemanticLossless(diffs); + + // Find any overlaps between deletions and insertions. + // e.g: abcxxxxxxdef + // -> abcxxxdef + // e.g: xxxabcdefxxx + // -> defxxxabc + // Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = 1; + while (pointer < diffs.length) { + if (diffs[pointer - 1][0] == DIFF_DELETE && + diffs[pointer][0] == DIFF_INSERT) { + var deletion = diffs[pointer - 1][1]; + var insertion = diffs[pointer][1]; + var overlap_length1 = this.diff_commonOverlap_(deletion, insertion); + var overlap_length2 = this.diff_commonOverlap_(insertion, deletion); + if (overlap_length1 >= overlap_length2) { + if (overlap_length1 >= deletion.length / 2 || + overlap_length1 >= insertion.length / 2) { + // Overlap found. Insert an equality and trim the surrounding edits. + diffs.splice(pointer, 0, + [DIFF_EQUAL, insertion.substring(0, overlap_length1)]); + diffs[pointer - 1][1] = + deletion.substring(0, deletion.length - overlap_length1); + diffs[pointer + 1][1] = insertion.substring(overlap_length1); + pointer++; + } + } else { + if (overlap_length2 >= deletion.length / 2 || + overlap_length2 >= insertion.length / 2) { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + diffs.splice(pointer, 0, + [DIFF_EQUAL, deletion.substring(0, overlap_length2)]); + diffs[pointer - 1][0] = DIFF_INSERT; + diffs[pointer - 1][1] = + insertion.substring(0, insertion.length - overlap_length2); + diffs[pointer + 1][0] = DIFF_DELETE; + diffs[pointer + 1][1] = + deletion.substring(overlap_length2); + pointer++; + } + } + pointer++; + } + pointer++; + } +}; + + +/** + * Look for single edits surrounded on both sides by equalities + * which can be shifted sideways to align the edit to a word boundary. + * e.g: The cat came. -> The cat came. + * @param {!Array.} diffs Array of diff tuples. + */ +diff_match_patch.prototype.diff_cleanupSemanticLossless = function(diffs) { + /** + * Given two strings, compute a score representing whether the internal + * boundary falls on logical boundaries. + * Scores range from 6 (best) to 0 (worst). + * Closure, but does not reference any external variables. + * @param {string} one First string. + * @param {string} two Second string. + * @return {number} The score. + * @private + */ + function diff_cleanupSemanticScore_(one, two) { + if (!one || !two) { + // Edges are the best. + return 6; + } + + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + var char1 = one.charAt(one.length - 1); + var char2 = two.charAt(0); + var nonAlphaNumeric1 = char1.match(diff_match_patch.nonAlphaNumericRegex_); + var nonAlphaNumeric2 = char2.match(diff_match_patch.nonAlphaNumericRegex_); + var whitespace1 = nonAlphaNumeric1 && + char1.match(diff_match_patch.whitespaceRegex_); + var whitespace2 = nonAlphaNumeric2 && + char2.match(diff_match_patch.whitespaceRegex_); + var lineBreak1 = whitespace1 && + char1.match(diff_match_patch.linebreakRegex_); + var lineBreak2 = whitespace2 && + char2.match(diff_match_patch.linebreakRegex_); + var blankLine1 = lineBreak1 && + one.match(diff_match_patch.blanklineEndRegex_); + var blankLine2 = lineBreak2 && + two.match(diff_match_patch.blanklineStartRegex_); + + if (blankLine1 || blankLine2) { + // Five points for blank lines. + return 5; + } else if (lineBreak1 || lineBreak2) { + // Four points for line breaks. + return 4; + } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { + // Three points for end of sentences. + return 3; + } else if (whitespace1 || whitespace2) { + // Two points for whitespace. + return 2; + } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { + // One point for non-alphanumeric. + return 1; + } + return 0; + } + + var pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while (pointer < diffs.length - 1) { + if (diffs[pointer - 1][0] == DIFF_EQUAL && + diffs[pointer + 1][0] == DIFF_EQUAL) { + // This is a single edit surrounded by equalities. + var equality1 = diffs[pointer - 1][1]; + var edit = diffs[pointer][1]; + var equality2 = diffs[pointer + 1][1]; + + // First, shift the edit as far left as possible. + var commonOffset = this.diff_commonSuffix(equality1, edit); + if (commonOffset) { + var commonString = edit.substring(edit.length - commonOffset); + equality1 = equality1.substring(0, equality1.length - commonOffset); + edit = commonString + edit.substring(0, edit.length - commonOffset); + equality2 = commonString + equality2; + } + + // Second, step character by character right, looking for the best fit. + var bestEquality1 = equality1; + var bestEdit = edit; + var bestEquality2 = equality2; + var bestScore = diff_cleanupSemanticScore_(equality1, edit) + + diff_cleanupSemanticScore_(edit, equality2); + while (edit.charAt(0) === equality2.charAt(0)) { + equality1 += edit.charAt(0); + edit = edit.substring(1) + equality2.charAt(0); + equality2 = equality2.substring(1); + var score = diff_cleanupSemanticScore_(equality1, edit) + + diff_cleanupSemanticScore_(edit, equality2); + // The >= encourages trailing rather than leading whitespace on edits. + if (score >= bestScore) { + bestScore = score; + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + } + } + + if (diffs[pointer - 1][1] != bestEquality1) { + // We have an improvement, save it back to the diff. + if (bestEquality1) { + diffs[pointer - 1][1] = bestEquality1; + } else { + diffs.splice(pointer - 1, 1); + pointer--; + } + diffs[pointer][1] = bestEdit; + if (bestEquality2) { + diffs[pointer + 1][1] = bestEquality2; + } else { + diffs.splice(pointer + 1, 1); + pointer--; + } + } + } + pointer++; + } +}; + +// Define some regex patterns for matching boundaries. +diff_match_patch.nonAlphaNumericRegex_ = /[^a-zA-Z0-9]/; +diff_match_patch.whitespaceRegex_ = /\s/; +diff_match_patch.linebreakRegex_ = /[\r\n]/; +diff_match_patch.blanklineEndRegex_ = /\n\r?\n$/; +diff_match_patch.blanklineStartRegex_ = /^\r?\n\r?\n/; + +/** + * Reduce the number of edits by eliminating operationally trivial equalities. + * @param {!Array.} diffs Array of diff tuples. + */ +diff_match_patch.prototype.diff_cleanupEfficiency = function(diffs) { + var changes = false; + var equalities = []; // Stack of indices where equalities are found. + var equalitiesLength = 0; // Keeping our own length var is faster in JS. + /** @type {?string} */ + var lastequality = null; + // Always equal to diffs[equalities[equalitiesLength - 1]][1] + var pointer = 0; // Index of current position. + // Is there an insertion operation before the last equality. + var pre_ins = false; + // Is there a deletion operation before the last equality. + var pre_del = false; + // Is there an insertion operation after the last equality. + var post_ins = false; + // Is there a deletion operation after the last equality. + var post_del = false; + while (pointer < diffs.length) { + if (diffs[pointer][0] == DIFF_EQUAL) { // Equality found. + if (diffs[pointer][1].length < this.Diff_EditCost && + (post_ins || post_del)) { + // Candidate found. + equalities[equalitiesLength++] = pointer; + pre_ins = post_ins; + pre_del = post_del; + lastequality = diffs[pointer][1]; + } else { + // Not a candidate, and can never become one. + equalitiesLength = 0; + lastequality = null; + } + post_ins = post_del = false; + } else { // An insertion or deletion. + if (diffs[pointer][0] == DIFF_DELETE) { + post_del = true; + } else { + post_ins = true; + } + /* + * Five types to be split: + * ABXYCD + * AXCD + * ABXC + * AXCD + * ABXC + */ + if (lastequality && ((pre_ins && pre_del && post_ins && post_del) || + ((lastequality.length < this.Diff_EditCost / 2) && + (pre_ins + pre_del + post_ins + post_del) == 3))) { + // Duplicate record. + diffs.splice(equalities[equalitiesLength - 1], 0, + [DIFF_DELETE, lastequality]); + // Change second copy to insert. + diffs[equalities[equalitiesLength - 1] + 1][0] = DIFF_INSERT; + equalitiesLength--; // Throw away the equality we just deleted; + lastequality = null; + if (pre_ins && pre_del) { + // No changes made which could affect previous entry, keep going. + post_ins = post_del = true; + equalitiesLength = 0; + } else { + equalitiesLength--; // Throw away the previous equality. + pointer = equalitiesLength > 0 ? + equalities[equalitiesLength - 1] : -1; + post_ins = post_del = false; + } + changes = true; + } + } + pointer++; + } + + if (changes) { + this.diff_cleanupMerge(diffs); + } +}; + + +/** + * Reorder and merge like edit sections. Merge equalities. + * Any edit section can move as long as it doesn't cross an equality. + * @param {!Array.} diffs Array of diff tuples. + */ +diff_match_patch.prototype.diff_cleanupMerge = function(diffs) { + diffs.push([DIFF_EQUAL, '']); // Add a dummy entry at the end. + var pointer = 0; + var count_delete = 0; + var count_insert = 0; + var text_delete = ''; + var text_insert = ''; + var commonlength; + while (pointer < diffs.length) { + switch (diffs[pointer][0]) { + case DIFF_INSERT: + count_insert++; + text_insert += diffs[pointer][1]; + pointer++; + break; + case DIFF_DELETE: + count_delete++; + text_delete += diffs[pointer][1]; + pointer++; + break; + case DIFF_EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete + count_insert > 1) { + if (count_delete !== 0 && count_insert !== 0) { + // Factor out any common prefixies. + commonlength = this.diff_commonPrefix(text_insert, text_delete); + if (commonlength !== 0) { + if ((pointer - count_delete - count_insert) > 0 && + diffs[pointer - count_delete - count_insert - 1][0] == + DIFF_EQUAL) { + diffs[pointer - count_delete - count_insert - 1][1] += + text_insert.substring(0, commonlength); + } else { + diffs.splice(0, 0, [DIFF_EQUAL, + text_insert.substring(0, commonlength)]); + pointer++; + } + text_insert = text_insert.substring(commonlength); + text_delete = text_delete.substring(commonlength); + } + // Factor out any common suffixies. + commonlength = this.diff_commonSuffix(text_insert, text_delete); + if (commonlength !== 0) { + diffs[pointer][1] = text_insert.substring(text_insert.length - + commonlength) + diffs[pointer][1]; + text_insert = text_insert.substring(0, text_insert.length - + commonlength); + text_delete = text_delete.substring(0, text_delete.length - + commonlength); + } + } + // Delete the offending records and add the merged ones. + if (count_delete === 0) { + diffs.splice(pointer - count_insert, + count_delete + count_insert, [DIFF_INSERT, text_insert]); + } else if (count_insert === 0) { + diffs.splice(pointer - count_delete, + count_delete + count_insert, [DIFF_DELETE, text_delete]); + } else { + diffs.splice(pointer - count_delete - count_insert, + count_delete + count_insert, [DIFF_DELETE, text_delete], + [DIFF_INSERT, text_insert]); + } + pointer = pointer - count_delete - count_insert + + (count_delete ? 1 : 0) + (count_insert ? 1 : 0) + 1; + } else if (pointer !== 0 && diffs[pointer - 1][0] == DIFF_EQUAL) { + // Merge this equality with the previous one. + diffs[pointer - 1][1] += diffs[pointer][1]; + diffs.splice(pointer, 1); + } else { + pointer++; + } + count_insert = 0; + count_delete = 0; + text_delete = ''; + text_insert = ''; + break; + } + } + if (diffs[diffs.length - 1][1] === '') { + diffs.pop(); // Remove the dummy entry at the end. + } + + // Second pass: look for single edits surrounded on both sides by equalities + // which can be shifted sideways to eliminate an equality. + // e.g: ABAC -> ABAC + var changes = false; + pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while (pointer < diffs.length - 1) { + if (diffs[pointer - 1][0] == DIFF_EQUAL && + diffs[pointer + 1][0] == DIFF_EQUAL) { + // This is a single edit surrounded by equalities. + if (diffs[pointer][1].substring(diffs[pointer][1].length - + diffs[pointer - 1][1].length) == diffs[pointer - 1][1]) { + // Shift the edit over the previous equality. + diffs[pointer][1] = diffs[pointer - 1][1] + + diffs[pointer][1].substring(0, diffs[pointer][1].length - + diffs[pointer - 1][1].length); + diffs[pointer + 1][1] = diffs[pointer - 1][1] + diffs[pointer + 1][1]; + diffs.splice(pointer - 1, 1); + changes = true; + } else if (diffs[pointer][1].substring(0, diffs[pointer + 1][1].length) == + diffs[pointer + 1][1]) { + // Shift the edit over the next equality. + diffs[pointer - 1][1] += diffs[pointer + 1][1]; + diffs[pointer][1] = + diffs[pointer][1].substring(diffs[pointer + 1][1].length) + + diffs[pointer + 1][1]; + diffs.splice(pointer + 1, 1); + changes = true; + } + } + pointer++; + } + // If shifts were made, the diff needs reordering and another shift sweep. + if (changes) { + this.diff_cleanupMerge(diffs); + } +}; + + +/** + * loc is a location in text1, compute and return the equivalent location in + * text2. + * e.g. 'The cat' vs 'The big cat', 1->1, 5->8 + * @param {!Array.} diffs Array of diff tuples. + * @param {number} loc Location within text1. + * @return {number} Location within text2. + */ +diff_match_patch.prototype.diff_xIndex = function(diffs, loc) { + var chars1 = 0; + var chars2 = 0; + var last_chars1 = 0; + var last_chars2 = 0; + var x; + for (x = 0; x < diffs.length; x++) { + if (diffs[x][0] !== DIFF_INSERT) { // Equality or deletion. + chars1 += diffs[x][1].length; + } + if (diffs[x][0] !== DIFF_DELETE) { // Equality or insertion. + chars2 += diffs[x][1].length; + } + if (chars1 > loc) { // Overshot the location. + break; + } + last_chars1 = chars1; + last_chars2 = chars2; + } + // Was the location was deleted? + if (diffs.length != x && diffs[x][0] === DIFF_DELETE) { + return last_chars2; + } + // Add the remaining character length. + return last_chars2 + (loc - last_chars1); +}; + + +/** + * Convert a diff array into a pretty HTML report. + * @param {!Array.} diffs Array of diff tuples. + * @return {string} HTML representation. + */ +diff_match_patch.prototype.diff_prettyHtml = function(diffs) { + var html = []; + var pattern_amp = /&/g; + var pattern_lt = //g; + var pattern_para = /\n/g; + for (var x = 0; x < diffs.length; x++) { + var op = diffs[x][0]; // Operation (insert, delete, equal) + var data = diffs[x][1]; // Text of change. + var text = data.replace(pattern_amp, '&').replace(pattern_lt, '<') + .replace(pattern_gt, '>').replace(pattern_para, '¶
'); + switch (op) { + case DIFF_INSERT: + html[x] = '' + text + ''; + break; + case DIFF_DELETE: + html[x] = '' + text + ''; + break; + case DIFF_EQUAL: + html[x] = '' + text + ''; + break; + } + } + return html.join(''); +}; + + +/** + * Compute and return the source text (all equalities and deletions). + * @param {!Array.} diffs Array of diff tuples. + * @return {string} Source text. + */ +diff_match_patch.prototype.diff_text1 = function(diffs) { + var text = []; + for (var x = 0; x < diffs.length; x++) { + if (diffs[x][0] !== DIFF_INSERT) { + text[x] = diffs[x][1]; + } + } + return text.join(''); +}; + + +/** + * Compute and return the destination text (all equalities and insertions). + * @param {!Array.} diffs Array of diff tuples. + * @return {string} Destination text. + */ +diff_match_patch.prototype.diff_text2 = function(diffs) { + var text = []; + for (var x = 0; x < diffs.length; x++) { + if (diffs[x][0] !== DIFF_DELETE) { + text[x] = diffs[x][1]; + } + } + return text.join(''); +}; + + +/** + * Compute the Levenshtein distance; the number of inserted, deleted or + * substituted characters. + * @param {!Array.} diffs Array of diff tuples. + * @return {number} Number of changes. + */ +diff_match_patch.prototype.diff_levenshtein = function(diffs) { + var levenshtein = 0; + var insertions = 0; + var deletions = 0; + for (var x = 0; x < diffs.length; x++) { + var op = diffs[x][0]; + var data = diffs[x][1]; + switch (op) { + case DIFF_INSERT: + insertions += data.length; + break; + case DIFF_DELETE: + deletions += data.length; + break; + case DIFF_EQUAL: + // A deletion and an insertion is one substitution. + levenshtein += Math.max(insertions, deletions); + insertions = 0; + deletions = 0; + break; + } + } + levenshtein += Math.max(insertions, deletions); + return levenshtein; +}; + + +/** + * Crush the diff into an encoded string which describes the operations + * required to transform text1 into text2. + * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. + * Operations are tab-separated. Inserted text is escaped using %xx notation. + * @param {!Array.} diffs Array of diff tuples. + * @return {string} Delta text. + */ +diff_match_patch.prototype.diff_toDelta = function(diffs) { + var text = []; + for (var x = 0; x < diffs.length; x++) { + switch (diffs[x][0]) { + case DIFF_INSERT: + text[x] = '+' + encodeURI(diffs[x][1]); + break; + case DIFF_DELETE: + text[x] = '-' + diffs[x][1].length; + break; + case DIFF_EQUAL: + text[x] = '=' + diffs[x][1].length; + break; + } + } + return text.join('\t').replace(/%20/g, ' '); +}; + + +/** + * Given the original text1, and an encoded string which describes the + * operations required to transform text1 into text2, compute the full diff. + * @param {string} text1 Source string for the diff. + * @param {string} delta Delta text. + * @return {!Array.} Array of diff tuples. + * @throws {!Error} If invalid input. + */ +diff_match_patch.prototype.diff_fromDelta = function(text1, delta) { + var diffs = []; + var diffsLength = 0; // Keeping our own length var is faster in JS. + var pointer = 0; // Cursor in text1 + var tokens = delta.split(/\t/g); + for (var x = 0; x < tokens.length; x++) { + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + var param = tokens[x].substring(1); + switch (tokens[x].charAt(0)) { + case '+': + try { + diffs[diffsLength++] = [DIFF_INSERT, decodeURI(param)]; + } catch (ex) { + // Malformed URI sequence. + throw new Error('Illegal escape in diff_fromDelta: ' + param); + } + break; + case '-': + // Fall through. + case '=': + var n = parseInt(param, 10); + if (isNaN(n) || n < 0) { + throw new Error('Invalid number in diff_fromDelta: ' + param); + } + var text = text1.substring(pointer, pointer += n); + if (tokens[x].charAt(0) == '=') { + diffs[diffsLength++] = [DIFF_EQUAL, text]; + } else { + diffs[diffsLength++] = [DIFF_DELETE, text]; + } + break; + default: + // Blank tokens are ok (from a trailing \t). + // Anything else is an error. + if (tokens[x]) { + throw new Error('Invalid diff operation in diff_fromDelta: ' + + tokens[x]); + } + } + } + if (pointer != text1.length) { + throw new Error('Delta length (' + pointer + + ') does not equal source text length (' + text1.length + ').'); + } + return diffs; +}; + + +// MATCH FUNCTIONS + + +/** + * Locate the best instance of 'pattern' in 'text' near 'loc'. + * @param {string} text The text to search. + * @param {string} pattern The pattern to search for. + * @param {number} loc The location to search around. + * @return {number} Best match index or -1. + */ +diff_match_patch.prototype.match_main = function(text, pattern, loc) { + // Check for null inputs. + if (text == null || pattern == null || loc == null) { + throw new Error('Null input. (match_main)'); + } + + loc = Math.max(0, Math.min(loc, text.length)); + if (text == pattern) { + // Shortcut (potentially not guaranteed by the algorithm) + return 0; + } else if (!text.length) { + // Nothing to match. + return -1; + } else if (text.substring(loc, loc + pattern.length) == pattern) { + // Perfect match at the perfect spot! (Includes case of null pattern) + return loc; + } else { + // Do a fuzzy compare. + return this.match_bitap_(text, pattern, loc); + } +}; + + +/** + * Locate the best instance of 'pattern' in 'text' near 'loc' using the + * Bitap algorithm. + * @param {string} text The text to search. + * @param {string} pattern The pattern to search for. + * @param {number} loc The location to search around. + * @return {number} Best match index or -1. + * @private + */ +diff_match_patch.prototype.match_bitap_ = function(text, pattern, loc) { + if (pattern.length > this.Match_MaxBits) { + throw new Error('Pattern too long for this browser.'); + } + + // Initialise the alphabet. + var s = this.match_alphabet_(pattern); + + var dmp = this; // 'this' becomes 'window' in a closure. + + /** + * Compute and return the score for a match with e errors and x location. + * Accesses loc and pattern through being a closure. + * @param {number} e Number of errors in match. + * @param {number} x Location of match. + * @return {number} Overall score for match (0.0 = good, 1.0 = bad). + * @private + */ + function match_bitapScore_(e, x) { + var accuracy = e / pattern.length; + var proximity = Math.abs(loc - x); + if (!dmp.Match_Distance) { + // Dodge divide by zero error. + return proximity ? 1.0 : accuracy; + } + return accuracy + (proximity / dmp.Match_Distance); + } + + // Highest score beyond which we give up. + var score_threshold = this.Match_Threshold; + // Is there a nearby exact match? (speedup) + var best_loc = text.indexOf(pattern, loc); + if (best_loc != -1) { + score_threshold = Math.min(match_bitapScore_(0, best_loc), score_threshold); + // What about in the other direction? (speedup) + best_loc = text.lastIndexOf(pattern, loc + pattern.length); + if (best_loc != -1) { + score_threshold = + Math.min(match_bitapScore_(0, best_loc), score_threshold); + } + } + + // Initialise the bit arrays. + var matchmask = 1 << (pattern.length - 1); + best_loc = -1; + + var bin_min, bin_mid; + var bin_max = pattern.length + text.length; + var last_rd; + for (var d = 0; d < pattern.length; d++) { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at this + // error level. + bin_min = 0; + bin_mid = bin_max; + while (bin_min < bin_mid) { + if (match_bitapScore_(d, loc + bin_mid) <= score_threshold) { + bin_min = bin_mid; + } else { + bin_max = bin_mid; + } + bin_mid = Math.floor((bin_max - bin_min) / 2 + bin_min); + } + // Use the result from this iteration as the maximum for the next. + bin_max = bin_mid; + var start = Math.max(1, loc - bin_mid + 1); + var finish = Math.min(loc + bin_mid, text.length) + pattern.length; + + var rd = Array(finish + 2); + rd[finish + 1] = (1 << d) - 1; + for (var j = finish; j >= start; j--) { + // The alphabet (s) is a sparse hash, so the following line generates + // warnings. + var charMatch = s[text.charAt(j - 1)]; + if (d === 0) { // First pass: exact match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; + } else { // Subsequent passes: fuzzy match. + rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | + (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | + last_rd[j + 1]; + } + if (rd[j] & matchmask) { + var score = match_bitapScore_(d, j - 1); + // This match will almost certainly be better than any existing match. + // But check anyway. + if (score <= score_threshold) { + // Told you so. + score_threshold = score; + best_loc = j - 1; + if (best_loc > loc) { + // When passing loc, don't exceed our current distance from loc. + start = Math.max(1, 2 * loc - best_loc); + } else { + // Already passed loc, downhill from here on in. + break; + } + } + } + } + // No hope for a (better) match at greater error levels. + if (match_bitapScore_(d + 1, loc) > score_threshold) { + break; + } + last_rd = rd; + } + return best_loc; +}; + + +/** + * Initialise the alphabet for the Bitap algorithm. + * @param {string} pattern The text to encode. + * @return {!Object} Hash of character locations. + * @private + */ +diff_match_patch.prototype.match_alphabet_ = function(pattern) { + var s = {}; + for (var i = 0; i < pattern.length; i++) { + s[pattern.charAt(i)] = 0; + } + for (var i = 0; i < pattern.length; i++) { + s[pattern.charAt(i)] |= 1 << (pattern.length - i - 1); + } + return s; +}; + + +// PATCH FUNCTIONS + + +/** + * Increase the context until it is unique, + * but don't let the pattern expand beyond Match_MaxBits. + * @param {!diff_match_patch.patch_obj} patch The patch to grow. + * @param {string} text Source text. + * @private + */ +diff_match_patch.prototype.patch_addContext_ = function(patch, text) { + if (text.length == 0) { + return; + } + var pattern = text.substring(patch.start2, patch.start2 + patch.length1); + var padding = 0; + + // Look for the first and last matches of pattern in text. If two different + // matches are found, increase the pattern length. + while (text.indexOf(pattern) != text.lastIndexOf(pattern) && + pattern.length < this.Match_MaxBits - this.Patch_Margin - + this.Patch_Margin) { + padding += this.Patch_Margin; + pattern = text.substring(patch.start2 - padding, + patch.start2 + patch.length1 + padding); + } + // Add one chunk for good luck. + padding += this.Patch_Margin; + + // Add the prefix. + var prefix = text.substring(patch.start2 - padding, patch.start2); + if (prefix) { + patch.diffs.unshift([DIFF_EQUAL, prefix]); + } + // Add the suffix. + var suffix = text.substring(patch.start2 + patch.length1, + patch.start2 + patch.length1 + padding); + if (suffix) { + patch.diffs.push([DIFF_EQUAL, suffix]); + } + + // Roll back the start points. + patch.start1 -= prefix.length; + patch.start2 -= prefix.length; + // Extend the lengths. + patch.length1 += prefix.length + suffix.length; + patch.length2 += prefix.length + suffix.length; +}; + + +/** + * Compute a list of patches to turn text1 into text2. + * Use diffs if provided, otherwise compute it ourselves. + * There are four ways to call this function, depending on what data is + * available to the caller: + * Method 1: + * a = text1, b = text2 + * Method 2: + * a = diffs + * Method 3 (optimal): + * a = text1, b = diffs + * Method 4 (deprecated, use method 3): + * a = text1, b = text2, c = diffs + * + * @param {string|!Array.} a text1 (methods 1,3,4) or + * Array of diff tuples for text1 to text2 (method 2). + * @param {string|!Array.} opt_b text2 (methods 1,4) or + * Array of diff tuples for text1 to text2 (method 3) or undefined (method 2). + * @param {string|!Array.} opt_c Array of diff tuples + * for text1 to text2 (method 4) or undefined (methods 1,2,3). + * @return {!Array.} Array of Patch objects. + */ +diff_match_patch.prototype.patch_make = function(a, opt_b, opt_c) { + var text1, diffs; + if (typeof a == 'string' && typeof opt_b == 'string' && + typeof opt_c == 'undefined') { + // Method 1: text1, text2 + // Compute diffs from text1 and text2. + text1 = /** @type {string} */(a); + diffs = this.diff_main(text1, /** @type {string} */(opt_b), true); + if (diffs.length > 2) { + this.diff_cleanupSemantic(diffs); + this.diff_cleanupEfficiency(diffs); + } + } else if (a && typeof a == 'object' && typeof opt_b == 'undefined' && + typeof opt_c == 'undefined') { + // Method 2: diffs + // Compute text1 from diffs. + diffs = /** @type {!Array.} */(a); + text1 = this.diff_text1(diffs); + } else if (typeof a == 'string' && opt_b && typeof opt_b == 'object' && + typeof opt_c == 'undefined') { + // Method 3: text1, diffs + text1 = /** @type {string} */(a); + diffs = /** @type {!Array.} */(opt_b); + } else if (typeof a == 'string' && typeof opt_b == 'string' && + opt_c && typeof opt_c == 'object') { + // Method 4: text1, text2, diffs + // text2 is not used. + text1 = /** @type {string} */(a); + diffs = /** @type {!Array.} */(opt_c); + } else { + throw new Error('Unknown call format to patch_make.'); + } + + if (diffs.length === 0) { + return []; // Get rid of the null case. + } + var patches = []; + var patch = new diff_match_patch.patch_obj(); + var patchDiffLength = 0; // Keeping our own length var is faster in JS. + var char_count1 = 0; // Number of characters into the text1 string. + var char_count2 = 0; // Number of characters into the text2 string. + // Start with text1 (prepatch_text) and apply the diffs until we arrive at + // text2 (postpatch_text). We recreate the patches one by one to determine + // context info. + var prepatch_text = text1; + var postpatch_text = text1; + for (var x = 0; x < diffs.length; x++) { + var diff_type = diffs[x][0]; + var diff_text = diffs[x][1]; + + if (!patchDiffLength && diff_type !== DIFF_EQUAL) { + // A new patch starts here. + patch.start1 = char_count1; + patch.start2 = char_count2; + } + + switch (diff_type) { + case DIFF_INSERT: + patch.diffs[patchDiffLength++] = diffs[x]; + patch.length2 += diff_text.length; + postpatch_text = postpatch_text.substring(0, char_count2) + diff_text + + postpatch_text.substring(char_count2); + break; + case DIFF_DELETE: + patch.length1 += diff_text.length; + patch.diffs[patchDiffLength++] = diffs[x]; + postpatch_text = postpatch_text.substring(0, char_count2) + + postpatch_text.substring(char_count2 + + diff_text.length); + break; + case DIFF_EQUAL: + if (diff_text.length <= 2 * this.Patch_Margin && + patchDiffLength && diffs.length != x + 1) { + // Small equality inside a patch. + patch.diffs[patchDiffLength++] = diffs[x]; + patch.length1 += diff_text.length; + patch.length2 += diff_text.length; + } else if (diff_text.length >= 2 * this.Patch_Margin) { + // Time for a new patch. + if (patchDiffLength) { + this.patch_addContext_(patch, prepatch_text); + patches.push(patch); + patch = new diff_match_patch.patch_obj(); + patchDiffLength = 0; + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + prepatch_text = postpatch_text; + char_count1 = char_count2; + } + } + break; + } + + // Update the current character count. + if (diff_type !== DIFF_INSERT) { + char_count1 += diff_text.length; + } + if (diff_type !== DIFF_DELETE) { + char_count2 += diff_text.length; + } + } + // Pick up the leftover patch if not empty. + if (patchDiffLength) { + this.patch_addContext_(patch, prepatch_text); + patches.push(patch); + } + + return patches; +}; + + +/** + * Given an array of patches, return another array that is identical. + * @param {!Array.} patches Array of Patch objects. + * @return {!Array.} Array of Patch objects. + */ +diff_match_patch.prototype.patch_deepCopy = function(patches) { + // Making deep copies is hard in JavaScript. + var patchesCopy = []; + for (var x = 0; x < patches.length; x++) { + var patch = patches[x]; + var patchCopy = new diff_match_patch.patch_obj(); + patchCopy.diffs = []; + for (var y = 0; y < patch.diffs.length; y++) { + patchCopy.diffs[y] = patch.diffs[y].slice(); + } + patchCopy.start1 = patch.start1; + patchCopy.start2 = patch.start2; + patchCopy.length1 = patch.length1; + patchCopy.length2 = patch.length2; + patchesCopy[x] = patchCopy; + } + return patchesCopy; +}; + + +/** + * Merge a set of patches onto the text. Return a patched text, as well + * as a list of true/false values indicating which patches were applied. + * @param {!Array.} patches Array of Patch objects. + * @param {string} text Old text. + * @return {!Array.>} Two element Array, containing the + * new text and an array of boolean values. + */ +diff_match_patch.prototype.patch_apply = function(patches, text) { + if (patches.length == 0) { + return [text, []]; + } + + // Deep copy the patches so that no changes are made to originals. + patches = this.patch_deepCopy(patches); + + var nullPadding = this.patch_addPadding(patches); + text = nullPadding + text + nullPadding; + + this.patch_splitMax(patches); + // delta keeps track of the offset between the expected and actual location + // of the previous patch. If there are patches expected at positions 10 and + // 20, but the first patch was found at 12, delta is 2 and the second patch + // has an effective expected position of 22. + var delta = 0; + var results = []; + for (var x = 0; x < patches.length; x++) { + var expected_loc = patches[x].start2 + delta; + var text1 = this.diff_text1(patches[x].diffs); + var start_loc; + var end_loc = -1; + if (text1.length > this.Match_MaxBits) { + // patch_splitMax will only provide an oversized pattern in the case of + // a monster delete. + start_loc = this.match_main(text, text1.substring(0, this.Match_MaxBits), + expected_loc); + if (start_loc != -1) { + end_loc = this.match_main(text, + text1.substring(text1.length - this.Match_MaxBits), + expected_loc + text1.length - this.Match_MaxBits); + if (end_loc == -1 || start_loc >= end_loc) { + // Can't find valid trailing context. Drop this patch. + start_loc = -1; + } + } + } else { + start_loc = this.match_main(text, text1, expected_loc); + } + if (start_loc == -1) { + // No match found. :( + results[x] = false; + // Subtract the delta for this failed patch from subsequent patches. + delta -= patches[x].length2 - patches[x].length1; + } else { + // Found a match. :) + results[x] = true; + delta = start_loc - expected_loc; + var text2; + if (end_loc == -1) { + text2 = text.substring(start_loc, start_loc + text1.length); + } else { + text2 = text.substring(start_loc, end_loc + this.Match_MaxBits); + } + if (text1 == text2) { + // Perfect match, just shove the replacement text in. + text = text.substring(0, start_loc) + + this.diff_text2(patches[x].diffs) + + text.substring(start_loc + text1.length); + } else { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + var diffs = this.diff_main(text1, text2, false); + if (text1.length > this.Match_MaxBits && + this.diff_levenshtein(diffs) / text1.length > + this.Patch_DeleteThreshold) { + // The end points match, but the content is unacceptably bad. + results[x] = false; + } else { + this.diff_cleanupSemanticLossless(diffs); + var index1 = 0; + var index2; + for (var y = 0; y < patches[x].diffs.length; y++) { + var mod = patches[x].diffs[y]; + if (mod[0] !== DIFF_EQUAL) { + index2 = this.diff_xIndex(diffs, index1); + } + if (mod[0] === DIFF_INSERT) { // Insertion + text = text.substring(0, start_loc + index2) + mod[1] + + text.substring(start_loc + index2); + } else if (mod[0] === DIFF_DELETE) { // Deletion + text = text.substring(0, start_loc + index2) + + text.substring(start_loc + this.diff_xIndex(diffs, + index1 + mod[1].length)); + } + if (mod[0] !== DIFF_DELETE) { + index1 += mod[1].length; + } + } + } + } + } + } + // Strip the padding off. + text = text.substring(nullPadding.length, text.length - nullPadding.length); + return [text, results]; +}; + + +/** + * Add some padding on text start and end so that edges can match something. + * Intended to be called only from within patch_apply. + * @param {!Array.} patches Array of Patch objects. + * @return {string} The padding string added to each side. + */ +diff_match_patch.prototype.patch_addPadding = function(patches) { + var paddingLength = this.Patch_Margin; + var nullPadding = ''; + for (var x = 1; x <= paddingLength; x++) { + nullPadding += String.fromCharCode(x); + } + + // Bump all the patches forward. + for (var x = 0; x < patches.length; x++) { + patches[x].start1 += paddingLength; + patches[x].start2 += paddingLength; + } + + // Add some padding on start of first diff. + var patch = patches[0]; + var diffs = patch.diffs; + if (diffs.length == 0 || diffs[0][0] != DIFF_EQUAL) { + // Add nullPadding equality. + diffs.unshift([DIFF_EQUAL, nullPadding]); + patch.start1 -= paddingLength; // Should be 0. + patch.start2 -= paddingLength; // Should be 0. + patch.length1 += paddingLength; + patch.length2 += paddingLength; + } else if (paddingLength > diffs[0][1].length) { + // Grow first equality. + var extraLength = paddingLength - diffs[0][1].length; + diffs[0][1] = nullPadding.substring(diffs[0][1].length) + diffs[0][1]; + patch.start1 -= extraLength; + patch.start2 -= extraLength; + patch.length1 += extraLength; + patch.length2 += extraLength; + } + + // Add some padding on end of last diff. + patch = patches[patches.length - 1]; + diffs = patch.diffs; + if (diffs.length == 0 || diffs[diffs.length - 1][0] != DIFF_EQUAL) { + // Add nullPadding equality. + diffs.push([DIFF_EQUAL, nullPadding]); + patch.length1 += paddingLength; + patch.length2 += paddingLength; + } else if (paddingLength > diffs[diffs.length - 1][1].length) { + // Grow last equality. + var extraLength = paddingLength - diffs[diffs.length - 1][1].length; + diffs[diffs.length - 1][1] += nullPadding.substring(0, extraLength); + patch.length1 += extraLength; + patch.length2 += extraLength; + } + + return nullPadding; +}; + + +/** + * Look through the patches and break up any which are longer than the maximum + * limit of the match algorithm. + * Intended to be called only from within patch_apply. + * @param {!Array.} patches Array of Patch objects. + */ +diff_match_patch.prototype.patch_splitMax = function(patches) { + var patch_size = this.Match_MaxBits; + for (var x = 0; x < patches.length; x++) { + if (patches[x].length1 <= patch_size) { + continue; + } + var bigpatch = patches[x]; + // Remove the big old patch. + patches.splice(x--, 1); + var start1 = bigpatch.start1; + var start2 = bigpatch.start2; + var precontext = ''; + while (bigpatch.diffs.length !== 0) { + // Create one of several smaller patches. + var patch = new diff_match_patch.patch_obj(); + var empty = true; + patch.start1 = start1 - precontext.length; + patch.start2 = start2 - precontext.length; + if (precontext !== '') { + patch.length1 = patch.length2 = precontext.length; + patch.diffs.push([DIFF_EQUAL, precontext]); + } + while (bigpatch.diffs.length !== 0 && + patch.length1 < patch_size - this.Patch_Margin) { + var diff_type = bigpatch.diffs[0][0]; + var diff_text = bigpatch.diffs[0][1]; + if (diff_type === DIFF_INSERT) { + // Insertions are harmless. + patch.length2 += diff_text.length; + start2 += diff_text.length; + patch.diffs.push(bigpatch.diffs.shift()); + empty = false; + } else if (diff_type === DIFF_DELETE && patch.diffs.length == 1 && + patch.diffs[0][0] == DIFF_EQUAL && + diff_text.length > 2 * patch_size) { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.length; + start1 += diff_text.length; + empty = false; + patch.diffs.push([diff_type, diff_text]); + bigpatch.diffs.shift(); + } else { + // Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text.substring(0, + patch_size - patch.length1 - this.Patch_Margin); + patch.length1 += diff_text.length; + start1 += diff_text.length; + if (diff_type === DIFF_EQUAL) { + patch.length2 += diff_text.length; + start2 += diff_text.length; + } else { + empty = false; + } + patch.diffs.push([diff_type, diff_text]); + if (diff_text == bigpatch.diffs[0][1]) { + bigpatch.diffs.shift(); + } else { + bigpatch.diffs[0][1] = + bigpatch.diffs[0][1].substring(diff_text.length); + } + } + } + // Compute the head context for the next patch. + precontext = this.diff_text2(patch.diffs); + precontext = + precontext.substring(precontext.length - this.Patch_Margin); + // Append the end context for this patch. + var postcontext = this.diff_text1(bigpatch.diffs) + .substring(0, this.Patch_Margin); + if (postcontext !== '') { + patch.length1 += postcontext.length; + patch.length2 += postcontext.length; + if (patch.diffs.length !== 0 && + patch.diffs[patch.diffs.length - 1][0] === DIFF_EQUAL) { + patch.diffs[patch.diffs.length - 1][1] += postcontext; + } else { + patch.diffs.push([DIFF_EQUAL, postcontext]); + } + } + if (!empty) { + patches.splice(++x, 0, patch); + } + } + } +}; + + +/** + * Take a list of patches and return a textual representation. + * @param {!Array.} patches Array of Patch objects. + * @return {string} Text representation of patches. + */ +diff_match_patch.prototype.patch_toText = function(patches) { + var text = []; + for (var x = 0; x < patches.length; x++) { + text[x] = patches[x]; + } + return text.join(''); +}; + + +/** + * Parse a textual representation of patches and return a list of Patch objects. + * @param {string} textline Text representation of patches. + * @return {!Array.} Array of Patch objects. + * @throws {!Error} If invalid input. + */ +diff_match_patch.prototype.patch_fromText = function(textline) { + var patches = []; + if (!textline) { + return patches; + } + var text = textline.split('\n'); + var textPointer = 0; + var patchHeader = /^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$/; + while (textPointer < text.length) { + var m = text[textPointer].match(patchHeader); + if (!m) { + throw new Error('Invalid patch string: ' + text[textPointer]); + } + var patch = new diff_match_patch.patch_obj(); + patches.push(patch); + patch.start1 = parseInt(m[1], 10); + if (m[2] === '') { + patch.start1--; + patch.length1 = 1; + } else if (m[2] == '0') { + patch.length1 = 0; + } else { + patch.start1--; + patch.length1 = parseInt(m[2], 10); + } + + patch.start2 = parseInt(m[3], 10); + if (m[4] === '') { + patch.start2--; + patch.length2 = 1; + } else if (m[4] == '0') { + patch.length2 = 0; + } else { + patch.start2--; + patch.length2 = parseInt(m[4], 10); + } + textPointer++; + + while (textPointer < text.length) { + var sign = text[textPointer].charAt(0); + try { + var line = decodeURI(text[textPointer].substring(1)); + } catch (ex) { + // Malformed URI sequence. + throw new Error('Illegal escape in patch_fromText: ' + line); + } + if (sign == '-') { + // Deletion. + patch.diffs.push([DIFF_DELETE, line]); + } else if (sign == '+') { + // Insertion. + patch.diffs.push([DIFF_INSERT, line]); + } else if (sign == ' ') { + // Minor equality. + patch.diffs.push([DIFF_EQUAL, line]); + } else if (sign == '@') { + // Start of next patch. + break; + } else if (sign === '') { + // Blank line? Whatever. + } else { + // WTF? + throw new Error('Invalid patch mode "' + sign + '" in: ' + line); + } + textPointer++; + } + } + return patches; +}; + + +/** + * Class representing one patch operation. + * @constructor + */ +diff_match_patch.patch_obj = function() { + /** @type {!Array.} */ + this.diffs = []; + /** @type {?number} */ + this.start1 = null; + /** @type {?number} */ + this.start2 = null; + /** @type {number} */ + this.length1 = 0; + /** @type {number} */ + this.length2 = 0; +}; + + +/** + * Emmulate GNU diff's format. + * Header: @@ -382,8 +481,9 @@ + * Indicies are printed as 1-based, not 0-based. + * @return {string} The GNU diff string. + */ +diff_match_patch.patch_obj.prototype.toString = function() { + var coords1, coords2; + if (this.length1 === 0) { + coords1 = this.start1 + ',0'; + } else if (this.length1 == 1) { + coords1 = this.start1 + 1; + } else { + coords1 = (this.start1 + 1) + ',' + this.length1; + } + if (this.length2 === 0) { + coords2 = this.start2 + ',0'; + } else if (this.length2 == 1) { + coords2 = this.start2 + 1; + } else { + coords2 = (this.start2 + 1) + ',' + this.length2; + } + var text = ['@@ -' + coords1 + ' +' + coords2 + ' @@\n']; + var op; + // Escape the body of the patch with %xx notation. + for (var x = 0; x < this.diffs.length; x++) { + switch (this.diffs[x][0]) { + case DIFF_INSERT: + op = '+'; + break; + case DIFF_DELETE: + op = '-'; + break; + case DIFF_EQUAL: + op = ' '; + break; + } + text[x + 1] = op + encodeURI(this.diffs[x][1]) + '\n'; + } + return text.join('').replace(/%20/g, ' '); +}; + + +// Export these global variables so that they survive Google's JS compiler. +// In a browser, 'this' will be 'window'. +// Users of node.js should 'require' the uncompressed version since Google's +// JS compiler may break the following exports for non-browser environments. +this['diff_match_patch'] = diff_match_patch; +this['DIFF_DELETE'] = DIFF_DELETE; +this['DIFF_INSERT'] = DIFF_INSERT; +this['DIFF_EQUAL'] = DIFF_EQUAL; diff --git a/services/track-changes/test/unit/coffee/UpdateCompressor/UpdateCompressorTests.coffee b/services/track-changes/test/unit/coffee/UpdateCompressor/UpdateCompressorTests.coffee index eab7422291..d2e449b646 100644 --- a/services/track-changes/test/unit/coffee/UpdateCompressor/UpdateCompressorTests.coffee +++ b/services/track-changes/test/unit/coffee/UpdateCompressor/UpdateCompressorTests.coffee @@ -345,6 +345,47 @@ describe "UpdateCompressor", -> meta: start_ts: @ts2, end_ts: @ts2, user_id: @user_id v: 43 }] + + describe "delete - insert", -> + it "should do a diff of the content", -> + expect(@UpdateCompressor.compressUpdates [{ + op: { p: 3, d: "one two three four five six seven eight" } + meta: ts: @ts1, user_id: @user_id + v: 42 + }, { + op: { p: 3, i: "one 2 three four five six seven eight" } + meta: ts: @ts2, user_id: @user_id + v: 43 + }]) + .to.deep.equal [{ + op: { p: 7, d: "two" } + meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id + v: 43 + }, { + op: { p: 7, i: "2" } + meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id + v: 43 + }] + + it "should do a diff of the content", -> + expect(@UpdateCompressor.compressUpdates [{ + op: { p: 3, d: "one two three four five six seven eight" } + meta: ts: @ts1, user_id: @user_id + v: 42 + }, { + op: { p: 3, i: "one 2 three four five six seven eight" } + meta: ts: @ts2, user_id: @user_id + v: 43 + }]) + .to.deep.equal [{ + op: { p: 7, d: "two" } + meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id + v: 43 + }, { + op: { p: 7, i: "2" } + meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id + v: 43 + }] describe "noop - insert", -> it "should leave them untouched", -> From dfe26262ec9197a722aaf1359efd8034e91c030f Mon Sep 17 00:00:00 2001 From: James Allen Date: Fri, 16 Sep 2016 11:50:44 +0100 Subject: [PATCH 02/16] Return a No-op if diff returns nothing --- .../app/coffee/UpdateCompressor.coffee | 22 ++++++++++++++----- .../UpdateCompressorTests.coffee | 15 +++++-------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/services/track-changes/app/coffee/UpdateCompressor.coffee b/services/track-changes/app/coffee/UpdateCompressor.coffee index f55d7462ea..3ae5adf8fb 100644 --- a/services/track-changes/app/coffee/UpdateCompressor.coffee +++ b/services/track-changes/app/coffee/UpdateCompressor.coffee @@ -162,16 +162,28 @@ module.exports = UpdateCompressor = else if firstOp.d? and secondOp.i? and firstOp.p == secondOp.p offset = firstOp.p diff_ops = @diffAsShareJsOps(firstOp.d, secondOp.i) - return diff_ops.map (op) -> - op.p += offset - return { + if diff_ops.length == 0 + return [{ # Noop meta: start_ts: firstUpdate.meta.start_ts end_ts: secondUpdate.meta.end_ts user_id: firstUpdate.meta.user_id - op: op + op: + p: firstOp.p + i: "" v: secondUpdate.v - } + }] + else + return diff_ops.map (op) -> + op.p += offset + return { + meta: + start_ts: firstUpdate.meta.start_ts + end_ts: secondUpdate.meta.end_ts + user_id: firstUpdate.meta.user_id + op: op + v: secondUpdate.v + } else return [firstUpdate, secondUpdate] diff --git a/services/track-changes/test/unit/coffee/UpdateCompressor/UpdateCompressorTests.coffee b/services/track-changes/test/unit/coffee/UpdateCompressor/UpdateCompressorTests.coffee index d2e449b646..b4858b79b0 100644 --- a/services/track-changes/test/unit/coffee/UpdateCompressor/UpdateCompressorTests.coffee +++ b/services/track-changes/test/unit/coffee/UpdateCompressor/UpdateCompressorTests.coffee @@ -7,7 +7,8 @@ SandboxedModule = require('sandboxed-module') describe "UpdateCompressor", -> beforeEach -> - @UpdateCompressor = SandboxedModule.require modulePath + @UpdateCompressor = SandboxedModule.require modulePath, requires: + "../lib/diff_match_patch": require("../../../../app/lib/diff_match_patch") @user_id = "user-id-1" @other_user_id = "user-id-2" @bigstring = ("a" for [0 .. 2*1024*1024]).join("") @@ -366,23 +367,19 @@ describe "UpdateCompressor", -> meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id v: 43 }] - - it "should do a diff of the content", -> + + it "should return a no-op if the delete and insert are the same", -> expect(@UpdateCompressor.compressUpdates [{ op: { p: 3, d: "one two three four five six seven eight" } meta: ts: @ts1, user_id: @user_id v: 42 }, { - op: { p: 3, i: "one 2 three four five six seven eight" } + op: { p: 3, i: "one two three four five six seven eight" } meta: ts: @ts2, user_id: @user_id v: 43 }]) .to.deep.equal [{ - op: { p: 7, d: "two" } - meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id - v: 43 - }, { - op: { p: 7, i: "2" } + op: { p: 3, i: "" } meta: start_ts: @ts1, end_ts: @ts2, user_id: @user_id v: 43 }] From 8dda44f880124a5dd9ae87aebe5ce0dca09e8f75 Mon Sep 17 00:00:00 2001 From: James Allen Date: Fri, 16 Sep 2016 11:51:41 +0100 Subject: [PATCH 03/16] Speed up unit tests by only generating big fixtures once up front --- .../UpdateCompressorTests.coffee | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/services/track-changes/test/unit/coffee/UpdateCompressor/UpdateCompressorTests.coffee b/services/track-changes/test/unit/coffee/UpdateCompressor/UpdateCompressorTests.coffee index b4858b79b0..05fe923ab3 100644 --- a/services/track-changes/test/unit/coffee/UpdateCompressor/UpdateCompressorTests.coffee +++ b/services/track-changes/test/unit/coffee/UpdateCompressor/UpdateCompressorTests.coffee @@ -5,14 +5,15 @@ expect = chai.expect modulePath = "../../../../app/js/UpdateCompressor.js" SandboxedModule = require('sandboxed-module') +bigstring = ("a" for [0 .. 2*1024*1024]).join("") +mediumstring = ("a" for [0 .. 1024*1024]).join("") + describe "UpdateCompressor", -> beforeEach -> @UpdateCompressor = SandboxedModule.require modulePath, requires: "../lib/diff_match_patch": require("../../../../app/lib/diff_match_patch") @user_id = "user-id-1" @other_user_id = "user-id-2" - @bigstring = ("a" for [0 .. 2*1024*1024]).join("") - @mediumstring = ("a" for [0 .. 1024*1024]).join("") @ts1 = Date.now() @ts2 = Date.now() + 1000 @@ -150,7 +151,7 @@ describe "UpdateCompressor", -> meta: ts: @ts1, user_id: @user_id v: 42 }, { - op: { p: 6, i: @bigstring } + op: { p: 6, i: bigstring } meta: ts: @ts2, user_id: @user_id v: 43 }]) @@ -159,47 +160,47 @@ describe "UpdateCompressor", -> meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id v: 42 }, { - op: { p: 6, i: @bigstring } + op: { p: 6, i: bigstring } meta: start_ts: @ts2, end_ts: @ts2, user_id: @user_id v: 43 }] it "should not append inserts that are too big (first op)", -> expect(@UpdateCompressor.compressUpdates [{ - op: { p: 3, i: @bigstring } + op: { p: 3, i: bigstring } meta: ts: @ts1, user_id: @user_id v: 42 }, { - op: { p: 3 + @bigstring.length, i: "bar" } + op: { p: 3 + bigstring.length, i: "bar" } meta: ts: @ts2, user_id: @user_id v: 43 }]) .to.deep.equal [{ - op: { p: 3, i: @bigstring } + op: { p: 3, i: bigstring } meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id v: 42 }, { - op: { p: 3 + @bigstring.length, i: "bar" } + op: { p: 3 + bigstring.length, i: "bar" } meta: start_ts: @ts2, end_ts: @ts2, user_id: @user_id v: 43 }] it "should not append inserts that are too big (first and second op)", -> expect(@UpdateCompressor.compressUpdates [{ - op: { p: 3, i: @mediumstring } + op: { p: 3, i: mediumstring } meta: ts: @ts1, user_id: @user_id v: 42 }, { - op: { p: 3 + @mediumstring.length, i: @mediumstring } + op: { p: 3 + mediumstring.length, i: mediumstring } meta: ts: @ts2, user_id: @user_id v: 43 }]) .to.deep.equal [{ - op: { p: 3, i: @mediumstring } + op: { p: 3, i: mediumstring } meta: start_ts: @ts1, end_ts: @ts1, user_id: @user_id v: 42 }, { - op: { p: 3 + @mediumstring.length, i: @mediumstring } + op: { p: 3 + mediumstring.length, i: mediumstring } meta: start_ts: @ts2, end_ts: @ts2, user_id: @user_id v: 43 }] From 05a048db9ab6bfb2beed184e8708cd2b4eff1d0e Mon Sep 17 00:00:00 2001 From: James Allen Date: Thu, 22 Sep 2016 11:13:26 +0100 Subject: [PATCH 04/16] Add in a little more logging --- services/track-changes/app/coffee/DiffGenerator.coffee | 2 +- services/track-changes/app/coffee/DiffManager.coffee | 2 ++ services/track-changes/app/coffee/DocumentUpdaterManager.coffee | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/services/track-changes/app/coffee/DiffGenerator.coffee b/services/track-changes/app/coffee/DiffGenerator.coffee index b4e0156f02..c9c8e1d4bd 100644 --- a/services/track-changes/app/coffee/DiffGenerator.coffee +++ b/services/track-changes/app/coffee/DiffGenerator.coffee @@ -18,7 +18,7 @@ module.exports = DiffGenerator = if e instanceof ConsistencyError and i = update.op.length - 1 # catch known case where the last op in an array has been # merged into a later op - logger.error {update, op: JSON.stringify(op)}, "marking op as broken" + logger.error {err: e, update, op: JSON.stringify(op)}, "marking op as broken" op.broken = true else throw e # rethrow the execption diff --git a/services/track-changes/app/coffee/DiffManager.coffee b/services/track-changes/app/coffee/DiffManager.coffee index dfdb1a80a2..71692b9d83 100644 --- a/services/track-changes/app/coffee/DiffManager.coffee +++ b/services/track-changes/app/coffee/DiffManager.coffee @@ -49,6 +49,8 @@ module.exports = DiffManager = lastUpdate = updates[0] if lastUpdate? and lastUpdate.v != version - 1 return callback new Error("latest update version, #{lastUpdate.v}, does not match doc version, #{version}") + + logger.log {docVersion: version, lastUpdateVersion: lastUpdate?.v, updateCount: updates.length}, "rewinding updates" tryUpdates = updates.slice().reverse() diff --git a/services/track-changes/app/coffee/DocumentUpdaterManager.coffee b/services/track-changes/app/coffee/DocumentUpdaterManager.coffee index edc2f11c68..625622206e 100644 --- a/services/track-changes/app/coffee/DocumentUpdaterManager.coffee +++ b/services/track-changes/app/coffee/DocumentUpdaterManager.coffee @@ -14,6 +14,7 @@ module.exports = DocumentUpdaterManager = body = JSON.parse(body) catch error return callback(error) + logger.log {project_id, doc_id, version: body.version}, "got doc from document updater" callback null, body.lines.join("\n"), body.version else error = new Error("doc updater returned a non-success status code: #{res.statusCode}") From cce93b95a544d45fe6f98f1eb2233fe6c8b7a701 Mon Sep 17 00:00:00 2001 From: James Allen Date: Thu, 22 Sep 2016 11:18:10 +0100 Subject: [PATCH 05/16] Fetch updates before doc content when building a diff --- services/track-changes/app/coffee/DiffManager.coffee | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/services/track-changes/app/coffee/DiffManager.coffee b/services/track-changes/app/coffee/DiffManager.coffee index 71692b9d83..94e2f0adf6 100644 --- a/services/track-changes/app/coffee/DiffManager.coffee +++ b/services/track-changes/app/coffee/DiffManager.coffee @@ -5,11 +5,12 @@ logger = require "logger-sharelatex" module.exports = DiffManager = getLatestDocAndUpdates: (project_id, doc_id, fromVersion, toVersion, callback = (error, content, version, updates) ->) -> - # retrieve the document before retreiving the updates, - # because updates are written to mongo after the document - DocumentUpdaterManager.getDocument project_id, doc_id, (error, content, version) -> + # Whichever order these are in, there is potential for updates to come in between + # them so that they do not return the same 'latest' versions. + # TODO: If these don't return consistent content, try again. + UpdatesManager.getDocUpdatesWithUserInfo project_id, doc_id, from: fromVersion, to: toVersion, (error, updates) -> return callback(error) if error? - UpdatesManager.getDocUpdatesWithUserInfo project_id, doc_id, from: fromVersion, to: toVersion, (error, updates) -> + DocumentUpdaterManager.getDocument project_id, doc_id, (error, content, version) -> return callback(error) if error? callback(null, content, version, updates) From bddd1fda7d4fe7338c08a399912f8eeae7b07354 Mon Sep 17 00:00:00 2001 From: James Allen Date: Fri, 30 Sep 2016 11:36:47 +0100 Subject: [PATCH 06/16] Retry rewind if doc and update versions don't match --- .../app/coffee/DiffManager.coffee | 36 +++++++-- .../DiffManager/DiffManagerTests.coffee | 78 ++++++++++++++++++- 2 files changed, 104 insertions(+), 10 deletions(-) diff --git a/services/track-changes/app/coffee/DiffManager.coffee b/services/track-changes/app/coffee/DiffManager.coffee index 94e2f0adf6..382b366b49 100644 --- a/services/track-changes/app/coffee/DiffManager.coffee +++ b/services/track-changes/app/coffee/DiffManager.coffee @@ -5,12 +5,11 @@ logger = require "logger-sharelatex" module.exports = DiffManager = getLatestDocAndUpdates: (project_id, doc_id, fromVersion, toVersion, callback = (error, content, version, updates) ->) -> - # Whichever order these are in, there is potential for updates to come in between - # them so that they do not return the same 'latest' versions. - # TODO: If these don't return consistent content, try again. - UpdatesManager.getDocUpdatesWithUserInfo project_id, doc_id, from: fromVersion, to: toVersion, (error, updates) -> + # Get updates last, since then they must be ahead and it + # might be possible to rewind to the same version as the doc. + DocumentUpdaterManager.getDocument project_id, doc_id, (error, content, version) -> return callback(error) if error? - DocumentUpdaterManager.getDocument project_id, doc_id, (error, content, version) -> + UpdatesManager.getDocUpdatesWithUserInfo project_id, doc_id, from: fromVersion, to: toVersion, (error, updates) -> return callback(error) if error? callback(null, content, version, updates) @@ -34,7 +33,28 @@ module.exports = DiffManager = callback(null, diff) - getDocumentBeforeVersion: (project_id, doc_id, version, callback = (error, document, rewoundUpdates) ->) -> + getDocumentBeforeVersion: (project_id, doc_id, version, _callback = (error, document, rewoundUpdates) ->) -> + # Whichever order we get the latest document and the latest updates, + # there is potential for updates to be applied between them so that + # they do not return the same 'latest' versions. + # If this happens, we just retry and hopefully get them at the compatible + # versions. + retries = 3 + callback = (error, args...) -> + if error? + if error.retry and retries > 0 + logger.warn {error, project_id, doc_id, version}, "retrying getDocumentBeforeVersion" + retry() + else + _callback(error) + else + _callback(null, args...) + + do retry = () -> + retries-- + DiffManager._tryGetDocumentBeforeVersion(project_id, doc_id, version, callback) + + _tryGetDocumentBeforeVersion: (project_id, doc_id, version, callback = (error, document, rewoundUpdates) ->) -> logger.log project_id: project_id, doc_id: doc_id, version: version, "getting document before version" DiffManager.getLatestDocAndUpdates project_id, doc_id, version, null, (error, content, version, updates) -> return callback(error) if error? @@ -49,7 +69,9 @@ module.exports = DiffManager = lastUpdate = updates[0] if lastUpdate? and lastUpdate.v != version - 1 - return callback new Error("latest update version, #{lastUpdate.v}, does not match doc version, #{version}") + error = new Error("latest update version, #{lastUpdate.v}, does not match doc version, #{version}") + error.retry = true + callback error logger.log {docVersion: version, lastUpdateVersion: lastUpdate?.v, updateCount: updates.length}, "rewinding updates" diff --git a/services/track-changes/test/unit/coffee/DiffManager/DiffManagerTests.coffee b/services/track-changes/test/unit/coffee/DiffManager/DiffManagerTests.coffee index 388520293e..5e906959d5 100644 --- a/services/track-changes/test/unit/coffee/DiffManager/DiffManagerTests.coffee +++ b/services/track-changes/test/unit/coffee/DiffManager/DiffManagerTests.coffee @@ -90,6 +90,76 @@ describe "DiffManager", -> .should.equal true describe "getDocumentBeforeVersion", -> + beforeEach -> + @DiffManager._tryGetDocumentBeforeVersion = sinon.stub() + @document = "mock-documents" + @rewound_updates = "mock-rewound-updates" + + describe "succesfully", -> + beforeEach -> + @DiffManager._tryGetDocumentBeforeVersion.yields(null, @document, @rewound_updates) + @DiffManager.getDocumentBeforeVersion @project_id, @doc_id, @version, @callback + + it "should call _tryGetDocumentBeforeVersion", -> + @DiffManager._tryGetDocumentBeforeVersion + .calledWith(@project_id, @doc_id, @version) + .should.equal true + + it "should call the callback with the response", -> + @callback.calledWith(null, @document, @rewound_updates).should.equal true + + describe "with a retry needed", -> + beforeEach -> + retried = false + @DiffManager._tryGetDocumentBeforeVersion = (project_id, doc_id, version, callback) => + if !retried + retried = true + error = new Error() + error.retry = true + callback error + else + callback(null, @document, @rewound_updates) + sinon.spy @DiffManager, "_tryGetDocumentBeforeVersion" + @DiffManager.getDocumentBeforeVersion @project_id, @doc_id, @version, @callback + + it "should call _tryGetDocumentBeforeVersion twice", -> + @DiffManager._tryGetDocumentBeforeVersion + .calledTwice + .should.equal true + + it "should call the callback with the response", -> + @callback.calledWith(null, @document, @rewound_updates).should.equal true + + describe "with a non-retriable error", -> + beforeEach -> + @error = new Error("oops") + @DiffManager._tryGetDocumentBeforeVersion.yields(@error) + @DiffManager.getDocumentBeforeVersion @project_id, @doc_id, @version, @callback + + it "should call _tryGetDocumentBeforeVersion once", -> + @DiffManager._tryGetDocumentBeforeVersion + .calledOnce + .should.equal true + + it "should call the callback with the error", -> + @callback.calledWith(@error).should.equal true + + describe "when retry limit is matched", -> + beforeEach -> + @error = new Error("oops") + @error.retry = true + @DiffManager._tryGetDocumentBeforeVersion.yields(@error) + @DiffManager.getDocumentBeforeVersion @project_id, @doc_id, @version, @callback + + it "should call _tryGetDocumentBeforeVersion three times (max retries)", -> + @DiffManager._tryGetDocumentBeforeVersion + .calledThrice + .should.equal true + + it "should call the callback with the error", -> + @callback.calledWith(@error).should.equal true + + describe "_tryGetDocumentBeforeVersion", -> beforeEach -> @content = "hello world" # Op versions are the version they were applied to, so doc is always one version @@ -113,7 +183,7 @@ describe "DiffManager", -> updates.reverse() return @rewound_content @rewindUpdatesWithArgs = @DiffGenerator.rewindUpdates.withArgs(@content, @updates.slice().reverse()) - @DiffManager.getDocumentBeforeVersion @project_id, @doc_id, @fromVersion, @callback + @DiffManager._tryGetDocumentBeforeVersion @project_id, @doc_id, @fromVersion, @callback it "should get the latest doc and version with all recent updates", -> @DiffManager.getLatestDocAndUpdates @@ -131,12 +201,14 @@ describe "DiffManager", -> @version = 50 @updates = [ { op: "mock-1", v: 40 }, { op: "mock-1", v: 39 } ] @DiffManager.getLatestDocAndUpdates = sinon.stub().callsArgWith(4, null, @content, @version, @updates) - @DiffManager.getDocumentBeforeVersion @project_id, @doc_id, @fromVersion, @callback + @DiffManager._tryGetDocumentBeforeVersion @project_id, @doc_id, @fromVersion, @callback - it "should call the callback with an error", -> + it "should call the callback with an error with retry = true set", -> @callback .calledWith(new Error("latest update version, 40, does not match doc version, 42")) .should.equal true + error = @callback.args[0][0] + expect(error.retry).to.equal true describe "when the updates are inconsistent", -> beforeEach -> From deced1aa15a87f23a229491651039ebaf31b50af Mon Sep 17 00:00:00 2001 From: James Allen Date: Fri, 30 Sep 2016 11:41:49 +0100 Subject: [PATCH 07/16] Fix unit tests with logger.warn stub --- .../test/unit/coffee/DiffManager/DiffManagerTests.coffee | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/track-changes/test/unit/coffee/DiffManager/DiffManagerTests.coffee b/services/track-changes/test/unit/coffee/DiffManager/DiffManagerTests.coffee index 5e906959d5..d19f3938cf 100644 --- a/services/track-changes/test/unit/coffee/DiffManager/DiffManagerTests.coffee +++ b/services/track-changes/test/unit/coffee/DiffManager/DiffManagerTests.coffee @@ -8,7 +8,7 @@ SandboxedModule = require('sandboxed-module') describe "DiffManager", -> beforeEach -> @DiffManager = SandboxedModule.require modulePath, requires: - "logger-sharelatex": @logger = { log: sinon.stub(), error: sinon.stub() } + "logger-sharelatex": @logger = { log: sinon.stub(), error: sinon.stub(), warn: sinon.stub() } "./UpdatesManager": @UpdatesManager = {} "./DocumentUpdaterManager": @DocumentUpdaterManager = {} "./DiffGenerator": @DiffGenerator = {} From a7f44bcd01afcb1ece16da9bd5f72d0f79c30e6c Mon Sep 17 00:00:00 2001 From: James Allen Date: Fri, 30 Sep 2016 13:36:31 +0100 Subject: [PATCH 08/16] Add missing return on callback --- services/track-changes/app/coffee/DiffManager.coffee | 2 +- .../test/unit/coffee/DiffManager/DiffManagerTests.coffee | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/services/track-changes/app/coffee/DiffManager.coffee b/services/track-changes/app/coffee/DiffManager.coffee index 382b366b49..f9adc56ea9 100644 --- a/services/track-changes/app/coffee/DiffManager.coffee +++ b/services/track-changes/app/coffee/DiffManager.coffee @@ -71,7 +71,7 @@ module.exports = DiffManager = if lastUpdate? and lastUpdate.v != version - 1 error = new Error("latest update version, #{lastUpdate.v}, does not match doc version, #{version}") error.retry = true - callback error + return callback error logger.log {docVersion: version, lastUpdateVersion: lastUpdate?.v, updateCount: updates.length}, "rewinding updates" diff --git a/services/track-changes/test/unit/coffee/DiffManager/DiffManagerTests.coffee b/services/track-changes/test/unit/coffee/DiffManager/DiffManagerTests.coffee index d19f3938cf..723193403c 100644 --- a/services/track-changes/test/unit/coffee/DiffManager/DiffManagerTests.coffee +++ b/services/track-changes/test/unit/coffee/DiffManager/DiffManagerTests.coffee @@ -204,9 +204,7 @@ describe "DiffManager", -> @DiffManager._tryGetDocumentBeforeVersion @project_id, @doc_id, @fromVersion, @callback it "should call the callback with an error with retry = true set", -> - @callback - .calledWith(new Error("latest update version, 40, does not match doc version, 42")) - .should.equal true + @callback.calledOnce.should.equal true error = @callback.args[0][0] expect(error.retry).to.equal true From 3c8aeb1262349257343fffd7caf7e013903565da Mon Sep 17 00:00:00 2001 From: James Allen Date: Fri, 30 Sep 2016 13:38:47 +0100 Subject: [PATCH 09/16] Log number of retries --- services/track-changes/app/coffee/DiffManager.coffee | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/track-changes/app/coffee/DiffManager.coffee b/services/track-changes/app/coffee/DiffManager.coffee index f9adc56ea9..c01c73a533 100644 --- a/services/track-changes/app/coffee/DiffManager.coffee +++ b/services/track-changes/app/coffee/DiffManager.coffee @@ -43,7 +43,7 @@ module.exports = DiffManager = callback = (error, args...) -> if error? if error.retry and retries > 0 - logger.warn {error, project_id, doc_id, version}, "retrying getDocumentBeforeVersion" + logger.warn {error, project_id, doc_id, version, retries}, "retrying getDocumentBeforeVersion" retry() else _callback(error) From f8865e616d86864ee8d46644fc48f20fff6c069c Mon Sep 17 00:00:00 2001 From: James Allen Date: Fri, 30 Sep 2016 14:34:13 +0100 Subject: [PATCH 10/16] Update ensureIndices to reflect reality --- services/track-changes/app/coffee/MongoManager.coffee | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/track-changes/app/coffee/MongoManager.coffee b/services/track-changes/app/coffee/MongoManager.coffee index 4376e3adb1..8390b25ef7 100644 --- a/services/track-changes/app/coffee/MongoManager.coffee +++ b/services/track-changes/app/coffee/MongoManager.coffee @@ -81,7 +81,7 @@ module.exports = MongoManager = # For finding all updates that go into a diff for a doc db.docHistory.ensureIndex { doc_id: 1, v: 1 }, { background: true } # For finding all updates that affect a project - db.docHistory.ensureIndex { project_id: 1, "meta.end_ts": 1, "meta.start_ts": -1 }, { background: true } + db.docHistory.ensureIndex { project_id: 1, "meta.end_ts": 1 }, { background: true } # For finding updates that don't yet have a project_id and need it inserting db.docHistory.ensureIndex { doc_id: 1, project_id: 1 }, { background: true } # For finding project meta-data From 40ed6fee468d332aa8c8458fc733e30f9625d2c6 Mon Sep 17 00:00:00 2001 From: James Allen Date: Tue, 4 Oct 2016 15:13:04 +0100 Subject: [PATCH 11/16] Split update summary on big deletes --- .../app/coffee/UpdatesManager.coffee | 24 +++++++++++- .../UpdatesManager/UpdatesManagerTests.coffee | 38 +++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/services/track-changes/app/coffee/UpdatesManager.coffee b/services/track-changes/app/coffee/UpdatesManager.coffee index 8506f6cdaf..bd53e81a0a 100644 --- a/services/track-changes/app/coffee/UpdatesManager.coffee +++ b/services/track-changes/app/coffee/UpdatesManager.coffee @@ -219,11 +219,33 @@ module.exports = UpdatesManager = return !!user_id.match(/^[a-f0-9]{24}$/) TIME_BETWEEN_DISTINCT_UPDATES: fiveMinutes = 5 * 60 * 1000 + SPLIT_ON_DELETE_SIZE: 16 # characters _summarizeUpdates: (updates, existingSummarizedUpdates = []) -> summarizedUpdates = existingSummarizedUpdates.slice() + previousUpdateWasBigDelete = false for update in updates earliestUpdate = summarizedUpdates[summarizedUpdates.length - 1] - if earliestUpdate and earliestUpdate.meta.start_ts - update.meta.end_ts < @TIME_BETWEEN_DISTINCT_UPDATES + shouldConcat = false + + # If a user inserts some text, then deletes a big chunk including that text, + # the update we show might concat the insert and delete, and there will be no sign + # of that insert having happened, or be able to restore to it (restoring after a big delete is common). + # So, we split the summary on 'big' deletes. However, we've stepping backwards in time with + # most recent changes considered first, so if this update is a big delete, we want to start + # a new summarized update next timge, hence we monitor the previous update. + if previousUpdateWasBigDelete + shouldConcat = false + else if earliestUpdate and earliestUpdate.meta.start_ts - update.meta.end_ts < @TIME_BETWEEN_DISTINCT_UPDATES + shouldConcat = true + + isBigDelete = false + for op in update.op or [] + if op.d? and op.d.length > @SPLIT_ON_DELETE_SIZE + isBigDelete = true + + previousUpdateWasBigDelete = isBigDelete + + if shouldConcat # check if the user in this update is already present in the earliest update, # if not, add them to the users list of the earliest update earliestUpdate.meta.user_ids = _.union earliestUpdate.meta.user_ids, [update.meta.user_id] diff --git a/services/track-changes/test/unit/coffee/UpdatesManager/UpdatesManagerTests.coffee b/services/track-changes/test/unit/coffee/UpdatesManager/UpdatesManagerTests.coffee index cff44f07eb..161a5deb55 100644 --- a/services/track-changes/test/unit/coffee/UpdatesManager/UpdatesManagerTests.coffee +++ b/services/track-changes/test/unit/coffee/UpdatesManager/UpdatesManagerTests.coffee @@ -761,3 +761,41 @@ describe "UpdatesManager", -> start_ts: @now end_ts: @now + 30 }] + + it "should split updates before a big delete", -> + result = @UpdatesManager._summarizeUpdates [{ + doc_id: "doc-id-1" + op: [{ d: "this is a long long long long long delete", p: 34 }] + meta: + user_id: @user_1.id + start_ts: @now + 20 + end_ts: @now + 30 + v: 5 + }, { + doc_id: "doc-id-1" + meta: + user_id: @user_2.id + start_ts: @now + end_ts: @now + 10 + v: 4 + }] + + expect(result).to.deep.equal [{ + docs: + "doc-id-1": + fromV: 5 + toV: 5 + meta: + user_ids: [@user_1.id] + start_ts: @now + 20 + end_ts: @now + 30 + }, { + docs: + "doc-id-1": + fromV: 4 + toV: 4 + meta: + user_ids: [@user_2.id] + start_ts: @now + end_ts: @now + 10 + }] \ No newline at end of file From 694be95e66e7c40db124de15c213966416e1e3a9 Mon Sep 17 00:00:00 2001 From: James Allen Date: Tue, 11 Oct 2016 11:01:50 +0100 Subject: [PATCH 12/16] Swap start_ts and end_ts comparison to correctly break chunks after 5 minutes --- services/track-changes/app/coffee/UpdatesManager.coffee | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/services/track-changes/app/coffee/UpdatesManager.coffee b/services/track-changes/app/coffee/UpdatesManager.coffee index bd53e81a0a..ad6a583979 100644 --- a/services/track-changes/app/coffee/UpdatesManager.coffee +++ b/services/track-changes/app/coffee/UpdatesManager.coffee @@ -235,7 +235,9 @@ module.exports = UpdatesManager = # a new summarized update next timge, hence we monitor the previous update. if previousUpdateWasBigDelete shouldConcat = false - else if earliestUpdate and earliestUpdate.meta.start_ts - update.meta.end_ts < @TIME_BETWEEN_DISTINCT_UPDATES + else if earliestUpdate and earliestUpdate.meta.end_ts - update.meta.start_ts < @TIME_BETWEEN_DISTINCT_UPDATES + # We're going backwards in time through the updates, so only combine if this update starts less than 5 minutes before + # the end of current summarized block, so no block spans more than 5 minutes. shouldConcat = true isBigDelete = false From dadc548f81150df5f995850cbd908d87d6f40c57 Mon Sep 17 00:00:00 2001 From: James Allen Date: Thu, 12 Jan 2017 10:04:50 +0100 Subject: [PATCH 13/16] Ignore comment updates --- .../app/coffee/DiffGenerator.coffee | 3 +++ .../app/coffee/UpdateCompressor.coffee | 6 +++-- .../coffee/AppendingUpdatesTests.coffee | 22 +++++++++++++++++++ .../UpdateCompressorTests.coffee | 16 ++++++++++++++ 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/services/track-changes/app/coffee/DiffGenerator.coffee b/services/track-changes/app/coffee/DiffGenerator.coffee index c9c8e1d4bd..8738621f67 100644 --- a/services/track-changes/app/coffee/DiffGenerator.coffee +++ b/services/track-changes/app/coffee/DiffGenerator.coffee @@ -47,6 +47,9 @@ module.exports = DiffGenerator = else if op.d? return content.slice(0, op.p) + op.d + content.slice(op.p) + + else + return content rewindUpdates: (content, updates) -> for update in updates.reverse() diff --git a/services/track-changes/app/coffee/UpdateCompressor.coffee b/services/track-changes/app/coffee/UpdateCompressor.coffee index 3ae5adf8fb..9e11b281e6 100644 --- a/services/track-changes/app/coffee/UpdateCompressor.coffee +++ b/services/track-changes/app/coffee/UpdateCompressor.coffee @@ -24,7 +24,9 @@ module.exports = UpdateCompressor = convertToSingleOpUpdates: (updates) -> splitUpdates = [] for update in updates - if update.op.length == 0 + # Reject any non-insert or delete ops, i.e. comments + ops = update.op.filter (o) -> o.i? or o.d? + if ops.length == 0 splitUpdates.push op: UpdateCompressor.NOOP meta: @@ -33,7 +35,7 @@ module.exports = UpdateCompressor = user_id: update.meta.user_id v: update.v else - for op in update.op + for op in ops splitUpdates.push op: op meta: diff --git a/services/track-changes/test/acceptance/coffee/AppendingUpdatesTests.coffee b/services/track-changes/test/acceptance/coffee/AppendingUpdatesTests.coffee index 3d4bf14465..69d378fd9d 100644 --- a/services/track-changes/test/acceptance/coffee/AppendingUpdatesTests.coffee +++ b/services/track-changes/test/acceptance/coffee/AppendingUpdatesTests.coffee @@ -233,6 +233,28 @@ describe "Appending doc ops to the history", -> expect(@updates[0].pack[0].v).to.equal 3 expect(@updates[0].pack[1].v).to.equal 4 + describe "when there is a comment update", -> + before (done) -> + @project_id = ObjectId().toString() + @doc_id = ObjectId().toString() + @user_id = ObjectId().toString() + MockWebApi.projects[@project_id] = features: versioning: false + TrackChangesClient.pushRawUpdates @project_id, @doc_id, [{ + op: [{ c: "foo", p: 3 }, {d: "bar", p: 6}] + meta: { ts: Date.now(), user_id: @user_id } + v: 3 + }], (error) => + throw error if error? + TrackChangesClient.flushAndGetCompressedUpdates @project_id, @doc_id, (error, @updates) => + throw error if error? + done() + + it "should ignore the comment op", -> + expect(@updates[0].pack[0].op).to.deep.equal [{d: "bar", p: 6}] + + it "should insert the correct version numbers into mongo", -> + expect(@updates[0].pack[0].v).to.equal 3 + describe "when the project has versioning enabled", -> before (done) -> @project_id = ObjectId().toString() diff --git a/services/track-changes/test/unit/coffee/UpdateCompressor/UpdateCompressorTests.coffee b/services/track-changes/test/unit/coffee/UpdateCompressor/UpdateCompressorTests.coffee index 05fe923ab3..b4488c17bc 100644 --- a/services/track-changes/test/unit/coffee/UpdateCompressor/UpdateCompressorTests.coffee +++ b/services/track-changes/test/unit/coffee/UpdateCompressor/UpdateCompressorTests.coffee @@ -54,6 +54,22 @@ describe "UpdateCompressor", -> v: 42 }] + it "should ignore comment ops", -> + expect(@UpdateCompressor.convertToSingleOpUpdates [{ + op: [ @op1 = { p: 0, i: "Foo" }, @op2 = { p: 9, c: "baz"}, @op3 = { p: 6, i: "bar"} ] + meta: { ts: @ts1, user_id: @user_id } + v: 42 + }]) + .to.deep.equal [{ + op: @op1, + meta: { start_ts: @ts1, end_ts: @ts1, user_id: @user_id }, + v: 42 + }, { + op: @op3, + meta: { start_ts: @ts1, end_ts: @ts1, user_id: @user_id }, + v: 42 + }] + describe "concatUpdatesWithSameVersion", -> it "should concat updates with the same version", -> expect(@UpdateCompressor.concatUpdatesWithSameVersion [{ From 1d356f93ec4f1582b1a7537fbff83bd35587f335 Mon Sep 17 00:00:00 2001 From: Henry Oswald Date: Thu, 26 Jan 2017 13:07:54 +0000 Subject: [PATCH 14/16] added random up to 30 mins delay to the ttl of mongo objects --- .../app/coffee/PackManager.coffee | 8 ++++- .../PackManager/PackManagerTests.coffee | 31 ++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/services/track-changes/app/coffee/PackManager.coffee b/services/track-changes/app/coffee/PackManager.coffee index f9707931cf..15e7e8902a 100644 --- a/services/track-changes/app/coffee/PackManager.coffee +++ b/services/track-changes/app/coffee/PackManager.coffee @@ -526,7 +526,13 @@ module.exports = PackManager = setTTLOnArchivedPack: (project_id, doc_id, pack_id, callback) -> db.docHistory.findAndModify { query: {_id: pack_id} - update: {$set: {expiresAt: new Date(Date.now() + 1*DAYS)}} + update: {$set: {expiresAt: PackManager._getOneDayInFutureWithRandomDelay()}} }, (err) -> logger.log {project_id, doc_id, pack_id}, "set expiry on pack" callback() + + + _getOneDayInFutureWithRandomDelay: -> + thirtyMins = 1000 * 60 * 30 + randomThirtyMinMax = Math.round(Math.random() * thirtyMins) + return new Date(Date.now() + randomThirtyMinMax + 1*DAYS) diff --git a/services/track-changes/test/unit/coffee/PackManager/PackManagerTests.coffee b/services/track-changes/test/unit/coffee/PackManager/PackManagerTests.coffee index bd2244517f..c792381c6a 100644 --- a/services/track-changes/test/unit/coffee/PackManager/PackManagerTests.coffee +++ b/services/track-changes/test/unit/coffee/PackManager/PackManagerTests.coffee @@ -25,7 +25,6 @@ describe "PackManager", -> @project_id = ObjectId().toString() @PackManager.MAX_COUNT = 512 - afterEach -> tk.reset() @@ -334,3 +333,33 @@ describe "PackManager", -> @callback.called.should.equal true it "should return with no error", -> @callback.calledWith(undefined).should.equal true + + describe "setTTLOnArchivedPack", -> + beforeEach -> + @pack_id = "somepackid" + @onedayinms = 86400000 + @db.docHistory = + findAndModify : sinon.stub().callsArgWith(1) + + it "should set expires to 1 day", (done)-> + @PackManager._getOneDayInFutureWithRandomDelay = sinon.stub().returns(@onedayinms) + @PackManager.setTTLOnArchivedPack @project_id, @doc_id, @pack_id, => + args = @db.docHistory.findAndModify.args[0][0] + args.query._id.should.equal @pack_id + args.update['$set'].expiresAt.should.equal @onedayinms + done() + + + describe "_getOneDayInFutureWithRandomDelay", -> + beforeEach -> + @onedayinms = 86400000 + @thirtyMins = 1000 * 60 * 30 + + it "should give 1 day + 30 mins random time", (done)-> + loops = 10000 + while --loops > 0 + randomDelay = @PackManager._getOneDayInFutureWithRandomDelay() - new Date(Date.now() + @onedayinms) + randomDelay.should.be.above(0) + randomDelay.should.be.below(@thirtyMins + 1) + done() + From ad5af5f4ddf3d1bbdc159896128267b7ec041fdc Mon Sep 17 00:00:00 2001 From: Henry Oswald Date: Thu, 26 Jan 2017 15:14:20 +0000 Subject: [PATCH 15/16] ceil not round --- services/track-changes/app/coffee/PackManager.coffee | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/track-changes/app/coffee/PackManager.coffee b/services/track-changes/app/coffee/PackManager.coffee index 15e7e8902a..98d4f6f274 100644 --- a/services/track-changes/app/coffee/PackManager.coffee +++ b/services/track-changes/app/coffee/PackManager.coffee @@ -534,5 +534,5 @@ module.exports = PackManager = _getOneDayInFutureWithRandomDelay: -> thirtyMins = 1000 * 60 * 30 - randomThirtyMinMax = Math.round(Math.random() * thirtyMins) + randomThirtyMinMax = Math.ceil(Math.random() * thirtyMins) return new Date(Date.now() + randomThirtyMinMax + 1*DAYS) From ac5d59211d3c1139619586f81e8a62395a2ad710 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Tue, 31 Jan 2017 15:07:58 +0000 Subject: [PATCH 16/16] revert random TTL in favour of delay in archiving there could be some issues with newer packs expiring before older ones --- .../app/coffee/PackManager.coffee | 10 ++-- .../PackManager/PackManagerTests.coffee | 48 +++++++++---------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/services/track-changes/app/coffee/PackManager.coffee b/services/track-changes/app/coffee/PackManager.coffee index 98d4f6f274..d60500ce56 100644 --- a/services/track-changes/app/coffee/PackManager.coffee +++ b/services/track-changes/app/coffee/PackManager.coffee @@ -526,13 +526,13 @@ module.exports = PackManager = setTTLOnArchivedPack: (project_id, doc_id, pack_id, callback) -> db.docHistory.findAndModify { query: {_id: pack_id} - update: {$set: {expiresAt: PackManager._getOneDayInFutureWithRandomDelay()}} + update: {$set: {expiresAt: new Date(Date.now() + 1*DAYS)}} }, (err) -> logger.log {project_id, doc_id, pack_id}, "set expiry on pack" callback() - _getOneDayInFutureWithRandomDelay: -> - thirtyMins = 1000 * 60 * 30 - randomThirtyMinMax = Math.ceil(Math.random() * thirtyMins) - return new Date(Date.now() + randomThirtyMinMax + 1*DAYS) +# _getOneDayInFutureWithRandomDelay: -> +# thirtyMins = 1000 * 60 * 30 +# randomThirtyMinMax = Math.ceil(Math.random() * thirtyMins) +# return new Date(Date.now() + randomThirtyMinMax + 1*DAYS) diff --git a/services/track-changes/test/unit/coffee/PackManager/PackManagerTests.coffee b/services/track-changes/test/unit/coffee/PackManager/PackManagerTests.coffee index c792381c6a..d1d7d898d6 100644 --- a/services/track-changes/test/unit/coffee/PackManager/PackManagerTests.coffee +++ b/services/track-changes/test/unit/coffee/PackManager/PackManagerTests.coffee @@ -334,32 +334,32 @@ describe "PackManager", -> it "should return with no error", -> @callback.calledWith(undefined).should.equal true - describe "setTTLOnArchivedPack", -> - beforeEach -> - @pack_id = "somepackid" - @onedayinms = 86400000 - @db.docHistory = - findAndModify : sinon.stub().callsArgWith(1) + # describe "setTTLOnArchivedPack", -> + # beforeEach -> + # @pack_id = "somepackid" + # @onedayinms = 86400000 + # @db.docHistory = + # findAndModify : sinon.stub().callsArgWith(1) - it "should set expires to 1 day", (done)-> - @PackManager._getOneDayInFutureWithRandomDelay = sinon.stub().returns(@onedayinms) - @PackManager.setTTLOnArchivedPack @project_id, @doc_id, @pack_id, => - args = @db.docHistory.findAndModify.args[0][0] - args.query._id.should.equal @pack_id - args.update['$set'].expiresAt.should.equal @onedayinms - done() + # it "should set expires to 1 day", (done)-> + # #@PackManager._getOneDayInFutureWithRandomDelay = sinon.stub().returns(@onedayinms) + # @PackManager.setTTLOnArchivedPack @project_id, @doc_id, @pack_id, => + # args = @db.docHistory.findAndModify.args[0][0] + # args.query._id.should.equal @pack_id + # args.update['$set'].expiresAt.should.equal @onedayinms + # done() - describe "_getOneDayInFutureWithRandomDelay", -> - beforeEach -> - @onedayinms = 86400000 - @thirtyMins = 1000 * 60 * 30 + # describe "_getOneDayInFutureWithRandomDelay", -> + # beforeEach -> + # @onedayinms = 86400000 + # @thirtyMins = 1000 * 60 * 30 - it "should give 1 day + 30 mins random time", (done)-> - loops = 10000 - while --loops > 0 - randomDelay = @PackManager._getOneDayInFutureWithRandomDelay() - new Date(Date.now() + @onedayinms) - randomDelay.should.be.above(0) - randomDelay.should.be.below(@thirtyMins + 1) - done() + # it "should give 1 day + 30 mins random time", (done)-> + # loops = 10000 + # while --loops > 0 + # randomDelay = @PackManager._getOneDayInFutureWithRandomDelay() - new Date(Date.now() + @onedayinms) + # randomDelay.should.be.above(0) + # randomDelay.should.be.below(@thirtyMins + 1) + # done()