support migration of project history keys to separate redis instance

2025-04-04 16:05:54 +00:00 · 2019-09-13 15:06:42 +01:00 · 2019-09-13 15:06:42 +01:00 · d0c5eb5698
commit d0c5eb5698
parent cd06b99df8
6 changed files with 222 additions and 7 deletions
--- a/services/document-updater/app/coffee/ProjectHistoryRedisManager.coffee
+++ b/services/document-updater/app/coffee/ProjectHistoryRedisManager.coffee
@ -1,6 +1,7 @@
 Settings = require('settings-sharelatex')
 projectHistoryKeys = Settings.redis?.project_history?.key_schema
-rclient = require("redis-sharelatex").createClient(Settings.redis.documentupdater)
+#rclient = require("redis-sharelatex").createClient(Settings.redis.project_history)
+rclient = require("./RedisMigrationManager").createClient(Settings.redis.project_history, Settings.redis.new_project_history)
 logger = require('logger-sharelatex')

 module.exports = ProjectHistoryRedisManager =
--- a/services/document-updater/app/coffee/RedisMigrationManager.coffee
+++ b/services/document-updater/app/coffee/RedisMigrationManager.coffee
@ -0,0 +1,199 @@
+logger = require "logger-sharelatex"
+Settings = require "settings-sharelatex"
+redis = require("redis-sharelatex")
+LockManager = require("./LockManager")
+async = require("async")
+
+# The aim is to migrate the project history queues
+# ProjectHistory:Ops:{project_id} from the existing redis to a new redis.
+#
+# This has to work in conjunction with changes in project history.
+#
+# The basic principles are:
+#
+# - project history is modified to read from an 'old' and 'new' queue. It reads
+#   from the 'old' queue first, and when that queue is empty it reads from the
+#   'new' queue.
+# - docupdater will migrate to writing to the 'new' queue when the 'old' queue
+#   is empty.
+#
+# Some facts about the update process:
+#
+# - project history has a lock on the project-id, so each queue is processed in
+#   isolation
+# - docupdaters take a lock on the doc_id but not the project_id, therefore
+#   multiple docupdaters can be appending to the queue for a project at the same
+#   time (provided they updates for individual docs are in order this is
+#   acceptable)
+# - as we want to do this without shutting down the site, we have to take into
+#   account that different versions of the code will be running while deploys
+#   are in progress.
+#
+# The migration has to be carried out with the following constraint:
+#
+# - a docupdater should never write to the "old" queue when there are updates in
+#   the "new" queue (there is a strict ordering on the versions, new > old)
+#
+# The deployment process for docupdater will be
+#
+# - add a project-level lock to the queuing in docupdater
+# - use a per-project migration flag to determine when to write to the new redis
+# - set the migration flag for projects with an empty queue in the old redis
+# - when all docupdaters respect the flag, make a new deploy which starts to set
+#   the flag
+# - when all docupdaters are setting the flag (and writing to the new redis),
+#   finish the migration by writing all data to the new redis
+#
+# Rollback
+#
+# Under the scheme above a project should only ever have data in the old redis
+# or the new redis, but never both at the same time.
+#
+# Two scenarios:
+#
+# Hard rollback
+#
+# If we want to roll back to the old redis immediately, we need to get the data
+# out of the new queues and back into the old queues, before appending to the
+# old queues again.  The actions to do this are:
+#
+#   - close the site
+#   - revert docupdater so it only writes to the original redis (there will now
+#     be some data in the new redis for some projects which we need to recover)
+#   - run a script to move the new queues back into the old redis
+#   - revert project history to only read from the original redis
+#
+# Graceful rollback
+#
+# If we are prepared to keep the new redis running, but not add new projects to
+# it we can do the following:
+#
+#  - deploy all docupdaters to update from the "switch" phase into the
+#    "rollback" phase (projects in the new redis will continue to send data
+#    there, project not yet migrated will continue to go to the old redis)
+#  - deploy project history with the "old queue" pointing to the new redis and
+#    the "new queue" to the old redis to clear the new queue before processing
+#    the new queue (i.e. add a rollback:true property in new_project_history in
+#    the project-history settings)
+#  - projects will now clear gradually from the new redis back to the old redis
+#  - get a list of all the projects in the new redis and flush them, which will
+#    cause the new queues to be cleared and the old redis to be used for those
+#    projects.
+
+getProjectId = (key) ->
+	key.match(/\{([0-9a-f]{24})\}/)[1]
+
+class Multi
+	constructor: (@migrationClient) ->
+		@command_list = []
+		@queueKey = null
+	rpush: (args...) ->
+		@queueKey = args[0]
+		@command_list.push { command:'rpush', args: args}
+	setnx: (args...) ->
+		@command_list.push { command: 'setnx', args: args}
+	exec: (callback) ->
+		# decide which client to use
+		project_id = getProjectId(@queueKey)
+		LockManager.getLock project_id, (error, lockValue) =>
+			return callback(error) if error?
+			releaseLock = (args...) =>
+				LockManager.releaseLock project_id, lockValue, (lockError) ->
+					return callback(lockError) if lockError?
+					callback(args...)
+			@migrationClient.findQueue @queueKey, (err, rclient) =>
+				return releaseLock(err) if err?
+				multi = rclient.multi()
+				for entry in @command_list
+					multi[entry.command](entry.args...)
+				multi.exec releaseLock
+
+class MigrationClient
+	constructor: (old_settings, new_settings) ->
+		@rclient_old = redis.createClient(old_settings)
+		@rclient_new = redis.createClient(new_settings)
+		@new_key_schema = new_settings.key_schema
+		@migration_phase = new_settings.migration_phase
+		throw new Error("invalid migration phase") unless @migration_phase in ['prepare', 'start', 'switch', 'complete']
+
+	getMigrationStatus: (key, migrationKey, callback) ->
+		async.series [
+			(cb) => @rclient_new.exists migrationKey, cb
+			(cb) => @rclient_new.exists key, cb
+			(cb) => @rclient_old.exists key, cb
+		], (err, result) ->
+			return callback(err) if err?
+			migrationKeyExists = result[0] > 0
+			newQueueExists = result[1] > 0
+			oldQueueExists = result[2] > 0
+			callback(null, migrationKeyExists, newQueueExists, oldQueueExists)
+
+	findQueue: (key, callback) ->
+		project_id = getProjectId(key)
+		migrationKey = @new_key_schema.projectHistoryMigrationKey({project_id})
+
+		@getMigrationStatus key, migrationKey, (err, migrationKeyExists, newQueueExists, oldQueueExists) ->
+			return callback(err) if err?
+			# In all cases, if the migration key exists we must always write to the
+			# new redis, unless we are rolling back.
+			if @migration_phase is "prepare"
+			# in this phase we prepare for the switch, when some docupdaters will
+			# start setting the migration flag.  We monitor the migration key and
+			# write to the new redis if the key is present, but we do not set the
+			# migration key. At this point no writes will be going into the new
+			# redis. When all the docupdaters are in the "prepare" phase we can
+			# begin deploying the "switch" phase.
+				if migrationKeyExists
+					logger.debug {project_id}, "using new client because migration key exists"
+					return callback(null, @rclient_new)
+				else
+					logger.debug {project_id}, "using old client because migration key does not exist"
+					return callback(null, @rclient_old)
+			else if @migration_phase is "switch"
+				# As we deploy the "switch" phase new docupdaters will set the migration
+				# flag for projects which have an empty queue in the old redis, and
+				# write updates into the new redis.  Existing docupdaters still in the
+				# "prepare" phase will pick up the migration flag and write new updates
+				# into the new redis when appropriate.  When this deploy is complete
+				# writes will be going into the new redis for projects with an empty
+				# queue in the old redis.  We have to remain in the switch phase until
+				# all projects are flushed from the old redis.
+				if migrationKeyExists
+					logger.debug {project_id}, "using new client because migration key exists"
+					return callback(null, @rclient_new)
+				else
+					if oldQueueExists
+						logger.debug {project_id}, "using old client because old queue exists"
+						return callback(null, @rclient_old)
+					else
+						@rclient_new.setnx migrationKey, "NEW", (err) =>
+							return callback(err) if err?
+							logger.debug {key: key}, "switching to new redis because old queue is empty"
+							return callback(null, @rclient_new)
+			else if @migration_phase is "rollback"
+				# If we need to roll back gracefully we do the opposite of the "switch"
+				# phase. We use the new redis when the migration key is set and the
+				# queue exists in the new redis, but if the queue in the new redis is
+				# empty we delete the migration key and send further updates to the old
+				# redis.
+				if migrationKeyExists
+					if newQueueExists
+						logger.debug {project_id}, "using new client because migration key exists and new queue is present"
+						return callback(null, @rclient_new)
+					else
+						@rclient_new.del migrationKey, (err) =>
+							return callback(err) if err?
+							logger.debug {key: key}, "switching to old redis in rollback phase because new queue is empty"
+							return callback(null, @rclient_old)
+				else
+					logger.debug {project_id}, "using old client because migration key does not exist"
+					return callback(null, @rclient_old)
+			else
+				logger.error {key: key}, "unknown migration phase"
+				callback(new Error('invalid migration phase'))
+	multi: () ->
+		new Multi(@)
+
+module.exports = RedisMigrationManager =
+	createClient: (args...) ->
+		new MigrationClient(args...)
--- a/services/document-updater/config/settings.defaults.coffee
+++ b/services/document-updater/config/settings.defaults.coffee
@ -45,6 +45,18 @@ module.exports =
 				projectHistoryOps: ({project_id}) -> "ProjectHistory:Ops:{#{project_id}}"
 				projectHistoryFirstOpTimestamp: ({project_id}) -> "ProjectHistory:FirstOpTimestamp:{#{project_id}}"

+		new_project_history:
+			port: process.env["NEW_HISTORY_REDIS_PORT"] or "6379"
+			host: process.env["NEW_HISTORY_REDIS_HOST"]
+			password: process.env["NEW_HISTORY_REDIS_PASSWORD"] or ""
+			key_schema:
+				projectHistoryOps: ({project_id}) -> "ProjectHistory:Ops:{#{project_id}}"
+				projectHistoryFirstOpTimestamp: ({project_id}) -> "ProjectHistory:FirstOpTimestamp:{#{project_id}}"
+				projectHistoryMigrationKey: ({project_id}) -> "ProjectHistory:MigrationKey:{#{project_id}}"
+			migration_phase: "prepare"
+			redisOptions:
+				keepAlive: 100
+
 		lock:
 			port: process.env["LOCK_REDIS_PORT"] or process.env["REDIS_PORT"] or "6379"
 			host: process.env["LOCK_REDIS_HOST"] or process.env["REDIS_HOST"] or "localhost"
--- a/services/document-updater/docker-compose.yml
+++ b/services/document-updater/docker-compose.yml
@ -25,6 +25,7 @@ services:
    environment:
      ELASTIC_SEARCH_DSN: es:9200
      REDIS_HOST: redis
+      NEW_HISTORY_REDIS_HOST: new_redis
      MONGO_HOST: mongo
      POSTGRES_HOST: postgres
      MOCHA_GREP: ${MOCHA_GREP}
@ -34,10 +35,9 @@ services:
    depends_on:
      - mongo
      - redis
+      - new_redis
    command: npm run test:acceptance

-
-
  tar:
    build: .
    image: ci/$PROJECT_NAME:$BRANCH_NAME-$BUILD_NUMBER
@ -49,7 +49,8 @@ services:
  redis:
    image: redis

+  new_redis:
+    image: redis
+
  mongo:
    image: mongo:3.4
-
-
--- a/services/document-updater/test/acceptance/coffee/helpers/DocUpdaterApp.coffee
+++ b/services/document-updater/test/acceptance/coffee/helpers/DocUpdaterApp.coffee
@ -13,8 +13,8 @@ module.exports =
 		else
 			@initing = true
 			@callbacks.push callback
-			app.listen 3003, "localhost", (error) => 
+			app.listen 3003, "localhost", (error) =>
 				throw error if error?
 				@running = true
 				for callback in @callbacks
-					callback()
+					callback()
--- a/services/document-updater/test/unit/coffee/ProjectHistoryRedisManager/ProjectHistoryRedisManagerTests.coffee
+++ b/services/document-updater/test/unit/coffee/ProjectHistoryRedisManager/ProjectHistoryRedisManagerTests.coffee
@ -24,6 +24,8 @@ describe "ProjectHistoryRedisManager", ->
 				}
 				"redis-sharelatex":
 					createClient: () => @rclient
+				"./RedisMigrationManager":
+					createClient: () => @rclient
 				"logger-sharelatex":
 					log:->
 			globals: