2020-02-17 12:34:04 -05:00
/ *
* decaffeinate suggestions :
* DS101 : Remove unnecessary use of Array . from
* DS102 : Remove unnecessary code created because of implicit returns
* DS103 : Rewrite code to no longer use _ _guard _ _
* DS205 : Consider reworking code to avoid use of IIFEs
* DS207 : Consider shorter variations of null checks
* Full docs : https : //github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
* /
let fiveMinutes , UpdatesManager ;
const MongoManager = require ( "./MongoManager" ) ;
const PackManager = require ( "./PackManager" ) ;
const RedisManager = require ( "./RedisManager" ) ;
const UpdateCompressor = require ( "./UpdateCompressor" ) ;
const LockManager = require ( "./LockManager" ) ;
const WebApiManager = require ( "./WebApiManager" ) ;
const UpdateTrimmer = require ( "./UpdateTrimmer" ) ;
const logger = require ( "logger-sharelatex" ) ;
const async = require ( "async" ) ;
const _ = require ( "underscore" ) ;
const Settings = require ( "settings-sharelatex" ) ;
const keys = Settings . redis . lock . key _schema ;
module . exports = ( UpdatesManager = {
compressAndSaveRawUpdates ( project _id , doc _id , rawUpdates , temporary , callback ) {
let i ;
if ( callback == null ) { callback = function ( error ) { } ; }
const { length } = rawUpdates ;
if ( length === 0 ) {
return callback ( ) ;
}
// check that ops are in the correct order
for ( i = 0 ; i < rawUpdates . length ; i ++ ) {
const op = rawUpdates [ i ] ;
if ( i > 0 ) {
const thisVersion = op != null ? op . v : undefined ;
const prevVersion = _ _guard _ _ ( rawUpdates [ i - 1 ] , x => x . v ) ;
if ( ! ( prevVersion < thisVersion ) ) {
logger . error ( { project _id , doc _id , rawUpdates , temporary , thisVersion , prevVersion } , "op versions out of order" ) ;
}
}
}
// FIXME: we no longer need the lastCompressedUpdate, so change functions not to need it
// CORRECTION: we do use it to log the time in case of error
return MongoManager . peekLastCompressedUpdate ( doc _id , function ( error , lastCompressedUpdate , lastVersion ) {
// lastCompressedUpdate is the most recent update in Mongo, and
// lastVersion is its sharejs version number.
//
// The peekLastCompressedUpdate method may pass the update back
// as 'null' (for example if the previous compressed update has
// been archived). In this case it can still pass back the
// lastVersion from the update to allow us to check consistency.
let op ;
if ( error != null ) { return callback ( error ) ; }
// Ensure that raw updates start where lastVersion left off
if ( lastVersion != null ) {
const discardedUpdates = [ ] ;
rawUpdates = rawUpdates . slice ( 0 ) ;
while ( ( rawUpdates [ 0 ] != null ) && ( rawUpdates [ 0 ] . v <= lastVersion ) ) {
discardedUpdates . push ( rawUpdates . shift ( ) ) ;
}
if ( discardedUpdates . length ) {
logger . error ( { project _id , doc _id , discardedUpdates , temporary , lastVersion } , "discarded updates already present" ) ;
}
if ( ( rawUpdates [ 0 ] != null ) && ( rawUpdates [ 0 ] . v !== ( lastVersion + 1 ) ) ) {
const ts = _ _guard _ _ ( lastCompressedUpdate != null ? lastCompressedUpdate . meta : undefined , x1 => x1 . end _ts ) ;
const last _timestamp = ( ts != null ) ? new Date ( ts ) : 'unknown time' ;
error = new Error ( ` Tried to apply raw op at version ${ rawUpdates [ 0 ] . v } to last compressed update with version ${ lastVersion } from ${ last _timestamp } ` ) ;
logger . error ( { err : error , doc _id , project _id , prev _end _ts : ts , temporary , lastCompressedUpdate } , "inconsistent doc versions" ) ;
if ( ( Settings . trackchanges != null ? Settings . trackchanges . continueOnError : undefined ) && ( rawUpdates [ 0 ] . v > ( lastVersion + 1 ) ) ) {
// we have lost some ops - continue to write into the database, we can't recover at this point
lastCompressedUpdate = null ;
} else {
return callback ( error ) ;
}
}
}
if ( rawUpdates . length === 0 ) {
return callback ( ) ;
}
// some old large ops in redis need to be rejected, they predate
// the size limit that now prevents them going through the system
const REJECT _LARGE _OP _SIZE = 4 * 1024 * 1024 ;
for ( var rawUpdate of Array . from ( rawUpdates ) ) {
const opSizes = ( ( ( ) => {
const result = [ ] ;
for ( op of Array . from ( ( rawUpdate != null ? rawUpdate . op : undefined ) || [ ] ) ) { result . push ( ( ( op . i != null ? op . i . length : undefined ) || ( op . d != null ? op . d . length : undefined ) ) ) ;
}
return result ;
} ) ( ) ) ;
const size = _ . max ( opSizes ) ;
if ( size > REJECT _LARGE _OP _SIZE ) {
error = new Error ( ` dropped op exceeding maximum allowed size of ${ REJECT _LARGE _OP _SIZE } ` ) ;
logger . error ( { err : error , doc _id , project _id , size , rawUpdate } , "dropped op - too big" ) ;
rawUpdate . op = [ ] ;
}
}
const compressedUpdates = UpdateCompressor . compressRawUpdates ( null , rawUpdates ) ;
return PackManager . insertCompressedUpdates ( project _id , doc _id , lastCompressedUpdate , compressedUpdates , temporary , function ( error , result ) {
if ( error != null ) { return callback ( error ) ; }
if ( result != null ) { logger . log ( { project _id , doc _id , orig _v : ( lastCompressedUpdate != null ? lastCompressedUpdate . v : undefined ) , new _v : result . v } , "inserted updates into pack" ) ; }
return callback ( ) ;
} ) ;
} ) ;
} ,
// Check whether the updates are temporary (per-project property)
_prepareProjectForUpdates ( project _id , callback ) {
if ( callback == null ) { callback = function ( error , temporary ) { } ; }
return UpdateTrimmer . shouldTrimUpdates ( project _id , function ( error , temporary ) {
if ( error != null ) { return callback ( error ) ; }
return callback ( null , temporary ) ;
} ) ;
} ,
// Check for project id on document history (per-document property)
_prepareDocForUpdates ( project _id , doc _id , callback ) {
if ( callback == null ) { callback = function ( error ) { } ; }
return MongoManager . backportProjectId ( project _id , doc _id , function ( error ) {
if ( error != null ) { return callback ( error ) ; }
return callback ( null ) ;
} ) ;
} ,
// Apply updates for specific project/doc after preparing at project and doc level
REDIS _READ _BATCH _SIZE : 100 ,
processUncompressedUpdates ( project _id , doc _id , temporary , callback ) {
// get the updates as strings from redis (so we can delete them after they are applied)
if ( callback == null ) { callback = function ( error ) { } ; }
return RedisManager . getOldestDocUpdates ( doc _id , UpdatesManager . REDIS _READ _BATCH _SIZE , function ( error , docUpdates ) {
if ( error != null ) { return callback ( error ) ; }
const { length } = docUpdates ;
// parse the redis strings into ShareJs updates
return RedisManager . expandDocUpdates ( docUpdates , function ( error , rawUpdates ) {
if ( error != null ) {
logger . err ( { project _id , doc _id , docUpdates } , "failed to parse docUpdates" ) ;
return callback ( error ) ;
}
logger . log ( { project _id , doc _id , rawUpdates } , "retrieved raw updates from redis" ) ;
return UpdatesManager . compressAndSaveRawUpdates ( project _id , doc _id , rawUpdates , temporary , function ( error ) {
if ( error != null ) { return callback ( error ) ; }
logger . log ( { project _id , doc _id } , "compressed and saved doc updates" ) ;
// delete the applied updates from redis
return RedisManager . deleteAppliedDocUpdates ( project _id , doc _id , docUpdates , function ( error ) {
if ( error != null ) { return callback ( error ) ; }
if ( length === UpdatesManager . REDIS _READ _BATCH _SIZE ) {
// There might be more updates
logger . log ( { project _id , doc _id } , "continuing processing updates" ) ;
return setTimeout ( ( ) => UpdatesManager . processUncompressedUpdates ( project _id , doc _id , temporary , callback )
, 0 ) ;
} else {
logger . log ( { project _id , doc _id } , "all raw updates processed" ) ;
return callback ( ) ;
}
} ) ;
} ) ;
} ) ;
} ) ;
} ,
// Process updates for a doc when we flush it individually
processUncompressedUpdatesWithLock ( project _id , doc _id , callback ) {
if ( callback == null ) { callback = function ( error ) { } ; }
return UpdatesManager . _prepareProjectForUpdates ( project _id , function ( error , temporary ) {
if ( error != null ) { return callback ( error ) ; }
return UpdatesManager . _processUncompressedUpdatesForDocWithLock ( project _id , doc _id , temporary , callback ) ;
} ) ;
} ,
// Process updates for a doc when the whole project is flushed (internal method)
_processUncompressedUpdatesForDocWithLock ( project _id , doc _id , temporary , callback ) {
if ( callback == null ) { callback = function ( error ) { } ; }
return UpdatesManager . _prepareDocForUpdates ( project _id , doc _id , function ( error ) {
if ( error != null ) { return callback ( error ) ; }
return LockManager . runWithLock (
2017-05-15 05:34:24 -04:00
keys . historyLock ( { doc _id } ) ,
2020-02-17 12:34:04 -05:00
releaseLock => UpdatesManager . processUncompressedUpdates ( project _id , doc _id , temporary , releaseLock ) ,
2017-03-27 09:23:34 -04:00
callback
2020-02-17 12:34:04 -05:00
) ;
} ) ;
} ,
// Process all updates for a project, only check project-level information once
processUncompressedUpdatesForProject ( project _id , callback ) {
if ( callback == null ) { callback = function ( error ) { } ; }
return RedisManager . getDocIdsWithHistoryOps ( project _id , function ( error , doc _ids ) {
if ( error != null ) { return callback ( error ) ; }
return UpdatesManager . _prepareProjectForUpdates ( project _id , function ( error , temporary ) {
const jobs = [ ] ;
for ( let doc _id of Array . from ( doc _ids ) ) {
( doc _id =>
jobs . push ( cb => UpdatesManager . _processUncompressedUpdatesForDocWithLock ( project _id , doc _id , temporary , cb ) )
) ( doc _id ) ;
}
return async . parallelLimit ( jobs , 5 , callback ) ;
} ) ;
} ) ;
} ,
// flush all outstanding changes
flushAll ( limit , callback ) {
if ( callback == null ) { callback = function ( error , result ) { } ; }
return RedisManager . getProjectIdsWithHistoryOps ( function ( error , project _ids ) {
let project _id ;
if ( error != null ) { return callback ( error ) ; }
logger . log ( { count : ( project _ids != null ? project _ids . length : undefined ) , project _ids } , "found projects" ) ;
const jobs = [ ] ;
project _ids = _ . shuffle ( project _ids ) ; // randomise to avoid hitting same projects each time
const selectedProjects = limit < 0 ? project _ids : project _ids . slice ( 0 , limit ) ;
for ( project _id of Array . from ( selectedProjects ) ) {
( project _id =>
jobs . push ( cb =>
UpdatesManager . processUncompressedUpdatesForProject ( project _id , err => cb ( null , { failed : ( err != null ) , project _id } ) )
)
) ( project _id ) ;
}
return async . series ( jobs , function ( error , result ) {
let x ;
if ( error != null ) { return callback ( error ) ; }
const failedProjects = ( ( ( ) => {
const result1 = [ ] ;
for ( x of Array . from ( result ) ) { if ( x . failed ) {
result1 . push ( x . project _id ) ;
}
}
return result1 ;
} ) ( ) ) ;
const succeededProjects = ( ( ( ) => {
const result2 = [ ] ;
for ( x of Array . from ( result ) ) { if ( ! x . failed ) {
result2 . push ( x . project _id ) ;
}
}
return result2 ;
} ) ( ) ) ;
return callback ( null , { failed : failedProjects , succeeded : succeededProjects , all : project _ids } ) ;
} ) ;
} ) ;
} ,
getDanglingUpdates ( callback ) {
if ( callback == null ) { callback = function ( error , doc _ids ) { } ; }
return RedisManager . getAllDocIdsWithHistoryOps ( function ( error , all _doc _ids ) {
if ( error != null ) { return callback ( error ) ; }
return RedisManager . getProjectIdsWithHistoryOps ( function ( error , all _project _ids ) {
if ( error != null ) { return callback ( error ) ; }
// function to get doc_ids for each project
const task = cb => async . concatSeries ( all _project _ids , RedisManager . getDocIdsWithHistoryOps , cb ) ;
// find the dangling doc ids
return task ( function ( error , project _doc _ids ) {
const dangling _doc _ids = _ . difference ( all _doc _ids , project _doc _ids ) ;
logger . log ( { all _doc _ids , all _project _ids , project _doc _ids , dangling _doc _ids } , "checking for dangling doc ids" ) ;
return callback ( null , dangling _doc _ids ) ;
} ) ;
} ) ;
} ) ;
} ,
getDocUpdates ( project _id , doc _id , options , callback ) {
if ( options == null ) { options = { } ; }
if ( callback == null ) { callback = function ( error , updates ) { } ; }
return UpdatesManager . processUncompressedUpdatesWithLock ( project _id , doc _id , function ( error ) {
if ( error != null ) { return callback ( error ) ; }
//console.log "options", options
return PackManager . getOpsByVersionRange ( project _id , doc _id , options . from , options . to , function ( error , updates ) {
if ( error != null ) { return callback ( error ) ; }
return callback ( null , updates ) ;
} ) ;
} ) ;
} ,
getDocUpdatesWithUserInfo ( project _id , doc _id , options , callback ) {
if ( options == null ) { options = { } ; }
if ( callback == null ) { callback = function ( error , updates ) { } ; }
return UpdatesManager . getDocUpdates ( project _id , doc _id , options , function ( error , updates ) {
if ( error != null ) { return callback ( error ) ; }
return UpdatesManager . fillUserInfo ( updates , function ( error , updates ) {
if ( error != null ) { return callback ( error ) ; }
return callback ( null , updates ) ;
} ) ;
} ) ;
} ,
getSummarizedProjectUpdates ( project _id , options , callback ) {
if ( options == null ) { options = { } ; }
if ( callback == null ) { callback = function ( error , updates ) { } ; }
if ( ! options . min _count ) { options . min _count = 25 ; }
let summarizedUpdates = [ ] ;
const { before } = options ;
let nextBeforeTimestamp = null ;
return UpdatesManager . processUncompressedUpdatesForProject ( project _id , function ( error ) {
if ( error != null ) { return callback ( error ) ; }
return PackManager . makeProjectIterator ( project _id , before , function ( err , iterator ) {
if ( err != null ) { return callback ( err ) ; }
// repeatedly get updates and pass them through the summariser to get an final output with user info
return async . whilst ( ( ) =>
//console.log "checking iterator.done", iterator.done()
( summarizedUpdates . length < options . min _count ) && ! iterator . done ( )
, cb =>
iterator . next ( function ( err , partialUpdates ) {
if ( err != null ) { return callback ( err ) ; }
//logger.log {partialUpdates}, 'got partialUpdates'
if ( partialUpdates . length === 0 ) { return cb ( ) ; } //# FIXME should try to avoid this happening
nextBeforeTimestamp = partialUpdates [ partialUpdates . length - 1 ] . meta . end _ts ;
// add the updates to the summary list
summarizedUpdates = UpdatesManager . _summarizeUpdates ( partialUpdates , summarizedUpdates ) ;
return cb ( ) ;
} )
, ( ) =>
// finally done all updates
//console.log 'summarized Updates', summarizedUpdates
UpdatesManager . fillSummarizedUserInfo ( summarizedUpdates , function ( err , results ) {
if ( err != null ) { return callback ( err ) ; }
return callback ( null , results , ! iterator . done ( ) ? nextBeforeTimestamp : undefined ) ;
} )
) ;
} ) ;
} ) ;
} ,
fetchUserInfo ( users , callback ) {
if ( callback == null ) { callback = function ( error , fetchedUserInfo ) { } ; }
const jobs = [ ] ;
const fetchedUserInfo = { } ;
for ( let user _id in users ) {
( user _id =>
jobs . push ( callback =>
WebApiManager . getUserInfo ( user _id , function ( error , userInfo ) {
if ( error != null ) { return callback ( error ) ; }
fetchedUserInfo [ user _id ] = userInfo ;
return callback ( ) ;
} )
)
) ( user _id ) ;
}
return async . series ( jobs , function ( err ) {
if ( err != null ) { return callback ( err ) ; }
return callback ( null , fetchedUserInfo ) ;
} ) ;
} ,
fillUserInfo ( updates , callback ) {
let update , user _id ;
if ( callback == null ) { callback = function ( error , updates ) { } ; }
const users = { } ;
for ( update of Array . from ( updates ) ) {
( { user _id } = update . meta ) ;
if ( UpdatesManager . _validUserId ( user _id ) ) {
users [ user _id ] = true ;
}
}
return UpdatesManager . fetchUserInfo ( users , function ( error , fetchedUserInfo ) {
if ( error != null ) { return callback ( error ) ; }
for ( update of Array . from ( updates ) ) {
( { user _id } = update . meta ) ;
delete update . meta . user _id ;
if ( UpdatesManager . _validUserId ( user _id ) ) {
update . meta . user = fetchedUserInfo [ user _id ] ;
}
}
return callback ( null , updates ) ;
} ) ;
} ,
fillSummarizedUserInfo ( updates , callback ) {
let update , user _id , user _ids ;
if ( callback == null ) { callback = function ( error , updates ) { } ; }
const users = { } ;
for ( update of Array . from ( updates ) ) {
user _ids = update . meta . user _ids || [ ] ;
for ( user _id of Array . from ( user _ids ) ) {
if ( UpdatesManager . _validUserId ( user _id ) ) {
users [ user _id ] = true ;
}
}
}
return UpdatesManager . fetchUserInfo ( users , function ( error , fetchedUserInfo ) {
if ( error != null ) { return callback ( error ) ; }
for ( update of Array . from ( updates ) ) {
user _ids = update . meta . user _ids || [ ] ;
update . meta . users = [ ] ;
delete update . meta . user _ids ;
for ( user _id of Array . from ( user _ids ) ) {
if ( UpdatesManager . _validUserId ( user _id ) ) {
update . meta . users . push ( fetchedUserInfo [ user _id ] ) ;
} else {
update . meta . users . push ( null ) ;
}
}
}
return callback ( null , updates ) ;
} ) ;
} ,
_validUserId ( user _id ) {
if ( ( user _id == null ) ) {
return false ;
} else {
return ! ! user _id . match ( /^[a-f0-9]{24}$/ ) ;
}
} ,
TIME _BETWEEN _DISTINCT _UPDATES : ( fiveMinutes = 5 * 60 * 1000 ) ,
SPLIT _ON _DELETE _SIZE : 16 , // characters
_summarizeUpdates ( updates , existingSummarizedUpdates ) {
if ( existingSummarizedUpdates == null ) { existingSummarizedUpdates = [ ] ; }
const summarizedUpdates = existingSummarizedUpdates . slice ( ) ;
let previousUpdateWasBigDelete = false ;
for ( let update of Array . from ( updates ) ) {
var doc _id ;
const earliestUpdate = summarizedUpdates [ summarizedUpdates . length - 1 ] ;
let shouldConcat = false ;
// If a user inserts some text, then deletes a big chunk including that text,
// the update we show might concat the insert and delete, and there will be no sign
// of that insert having happened, or be able to restore to it (restoring after a big delete is common).
// So, we split the summary on 'big' deletes. However, we've stepping backwards in time with
// most recent changes considered first, so if this update is a big delete, we want to start
// a new summarized update next timge, hence we monitor the previous update.
if ( previousUpdateWasBigDelete ) {
shouldConcat = false ;
} else if ( earliestUpdate && ( ( earliestUpdate . meta . end _ts - update . meta . start _ts ) < this . TIME _BETWEEN _DISTINCT _UPDATES ) ) {
// We're going backwards in time through the updates, so only combine if this update starts less than 5 minutes before
// the end of current summarized block, so no block spans more than 5 minutes.
shouldConcat = true ;
}
let isBigDelete = false ;
for ( let op of Array . from ( update . op || [ ] ) ) {
if ( ( op . d != null ) && ( op . d . length > this . SPLIT _ON _DELETE _SIZE ) ) {
isBigDelete = true ;
}
}
previousUpdateWasBigDelete = isBigDelete ;
if ( shouldConcat ) {
// check if the user in this update is already present in the earliest update,
// if not, add them to the users list of the earliest update
earliestUpdate . meta . user _ids = _ . union ( earliestUpdate . meta . user _ids , [ update . meta . user _id ] ) ;
doc _id = update . doc _id . toString ( ) ;
const doc = earliestUpdate . docs [ doc _id ] ;
if ( doc != null ) {
doc . fromV = Math . min ( doc . fromV , update . v ) ;
doc . toV = Math . max ( doc . toV , update . v ) ;
} else {
earliestUpdate . docs [ doc _id ] = {
fromV : update . v ,
2014-03-20 09:37:23 -04:00
toV : update . v
2020-02-17 12:34:04 -05:00
} ;
}
earliestUpdate . meta . start _ts = Math . min ( earliestUpdate . meta . start _ts , update . meta . start _ts ) ;
earliestUpdate . meta . end _ts = Math . max ( earliestUpdate . meta . end _ts , update . meta . end _ts ) ;
} else {
const newUpdate = {
meta : {
user _ids : [ ] ,
start _ts : update . meta . start _ts ,
2014-03-18 07:41:48 -04:00
end _ts : update . meta . end _ts
2020-02-17 12:34:04 -05:00
} ,
2014-03-20 09:37:23 -04:00
docs : { }
2020-02-17 12:34:04 -05:00
} ;
2014-03-20 09:37:23 -04:00
2020-02-17 12:34:04 -05:00
newUpdate . docs [ update . doc _id . toString ( ) ] = {
fromV : update . v ,
2014-03-18 07:41:48 -04:00
toV : update . v
2020-02-17 12:34:04 -05:00
} ;
newUpdate . meta . user _ids . push ( update . meta . user _id ) ;
summarizedUpdates . push ( newUpdate ) ;
}
}
return summarizedUpdates ;
}
} ) ;
function _ _guard _ _ ( value , transform ) {
return ( typeof value !== 'undefined' && value !== null ) ? transform ( value ) : undefined ;
}