diff --git a/release_tester/arangodb/sh.py b/release_tester/arangodb/sh.py
index a577f8de..9073e8f3 100644
--- a/release_tester/arangodb/sh.py
+++ b/release_tester/arangodb/sh.py
@@ -245,12 +245,14 @@ def hotbackup_create_nonbackup_data(self, suff=""):
             if (!arango.isConnected()) {{
               throw new Error('connecting the database failed');
             }}
+            print("connection is established, creating this_collection_will_not_be_backed_up{suff}");
             db._create("this_collection_will_not_be_backed_up{suff}");
+            print("now saving a document tot this collection");
             db.this_collection_will_not_be_backed_up{suff}.save(
                {{"this": "document will be gone"}});
         """
         logging.debug("script to be executed: " + str(js_script_string) + str(self.connect_instance))
-        res = self.run_command(["create volatile data", js_script_string], True)  # self.cfg.verbose)
+        res = self.run_command(["create volatile data", js_script_string], True, progressive_timeout=900)  # self.cfg.verbose)
         logging.debug("data create result: " + str(res))
 
         if not res:
diff --git a/release_tester/arangodb/starter/deployments/activefailover.py b/release_tester/arangodb/starter/deployments/activefailover.py
index 6a99bf85..3355af25 100644
--- a/release_tester/arangodb/starter/deployments/activefailover.py
+++ b/release_tester/arangodb/starter/deployments/activefailover.py
@@ -337,8 +337,10 @@ def jam_attempt_impl(self):
                 args = ["--skip", "802_"]
                 self.checkdata_args = args
             ret = curr_leader.arangosh.check_test_data(
-                "checking active failover new leader node", True, args, log_debug=True
-            )
+                "checking active failover new leader node",
+                True,
+                 args,
+                log_debug=True)
             if not ret[0]:
                 raise Exception("check data failed " + ret[1])
 
diff --git a/release_tester/arangodb/starter/deployments/cluster.py b/release_tester/arangodb/starter/deployments/cluster.py
index c64c0dce..87d70047 100644
--- a/release_tester/arangodb/starter/deployments/cluster.py
+++ b/release_tester/arangodb/starter/deployments/cluster.py
@@ -88,6 +88,7 @@ def __init__(
             if ver_found < len(versions):
                 print("One deployment doesn't support starters with more nodes!")
                 self.props.cluster_nodes = 3
+        self.backup_instance_count = self.props.cluster_nodes
 
     def starter_prepare_env_impl(self, sm=None, more_opts=None):
         # pylint: disable=invalid-name
@@ -146,7 +147,9 @@ def add_starter(name, port, opts, sm, hasAgency):
             self.create_tls_ca_cert()
         port = 9528
         count = 0
-        for this_node in list(range(1, self.props.cluster_nodes + 1)):
+        # we need 2 additional nodes for hotbackup testing
+        full_node_count = self.props.cluster_nodes + 2 if self.hot_backup else self.props.cluster_nodes
+        for this_node in list(range(1, full_node_count + 1)):
             node = []
             node_opts.append(node)
             if this_node != 1:
@@ -158,23 +161,22 @@ def add_starter(name, port, opts, sm, hasAgency):
             add_starter(f"node{this_node}", port, node + common_opts, sm, count < 3)
             port += 100
             count += 1
-        self.backup_instance_count = count
         for instance in self.starter_instances:
             instance.is_leader = True
 
     def starter_run_impl(self):
         lh.subsection("instance setup")
-        for manager in self.starter_instances:
+        for manager in self.starter_instances[: self.props.cluster_nodes]:
             logging.info("Spawning instance")
             manager.run_starter()
 
         logging.info("waiting for the starters to become alive")
-        not_started = self.starter_instances[:]  # This is a explicit copy
+        not_running = self.get_running_starters()  # This is a explicit copy
         count = 0
-        while not_started:
-            logging.debug("waiting for mananger with logfile:" + str(not_started[-1].log_file))
-            if not_started[-1].is_instance_up():
-                not_started.pop()
+        while not_running:
+            logging.debug("waiting for mananger with logfile:" + str(not_running[-1].log_file))
+            if not_running[-1].is_instance_up():
+                not_running.pop()
             progress(".")
             time.sleep(1)
             count += 1
@@ -182,20 +184,25 @@ def starter_run_impl(self):
                 raise Exception("Cluster installation didn't come up in two minutes!")
 
         logging.info("waiting for the cluster instances to become alive")
-        for node in self.starter_instances:
+        for node in self.get_running_starters():
             node.detect_instances()
             node.detect_instance_pids()
             # self.cfg.add_frontend('http', self.cfg.publicip, str(node.get_frontend_port()))
 
         logging.info("instances are ready - JWT: " + self.starter_instances[0].get_jwt_header())
         count = 0
-        for node in self.starter_instances:
+        for node in self.get_running_starters():
             node.set_passvoid("cluster", count == 0)
             count += 1
+        for node in self.get_not_running_starters():
+            node.set_passvoid("cluster", False)
         self.passvoid = "cluster"
+        self.cfg.passvoid = self.passvoid
+        if self.new_cfg:
+            self.new_cfg.passvoid = self.passvoid
 
     def finish_setup_impl(self):
-        self.makedata_instances = self.starter_instances[:]
+        self.makedata_instances = self.get_running_starters()
         self.set_frontend_instances()
 
     def _check_for_shards_in_sync(self):
@@ -234,10 +241,10 @@ def upgrade_arangod_version_impl(self):
         if self.cfg.stress_upgrade:
             bench_instances.append(self.starter_instances[0].launch_arangobench("cluster_upgrade_scenario_1"))
             bench_instances.append(self.starter_instances[1].launch_arangobench("cluster_upgrade_scenario_2"))
-        for node in self.starter_instances:
+        for node in self.get_running_starters():
             node.replace_binary_for_upgrade(self.new_installer.cfg)
 
-        for node in self.starter_instances:
+        for node in self.get_running_starters():
             node.detect_instance_pids_still_alive()
 
         self.starter_instances[1].command_upgrade()
@@ -491,12 +498,12 @@ def jam_attempt_impl(self):
         # After attempt of jamming, we have peer for nodeX in setup.json.
         # This peer will brake further updates because this peer is unavailable.
         # It is necessary to remove this peer from json for each starter instance
-        for instance in self.starter_instances:
+        for instance in self.get_running_starters():
             remove_node_x_from_json(instance.basedir)
 
     def shutdown_impl(self):
         ret = False
-        for node in self.starter_instances:
+        for node in self.get_running_starters():
             ret = ret or node.terminate_instance()
         logging.info("test ended")
         return ret
@@ -536,3 +543,103 @@ def generate_keyfile(self, keyfile):
                 "--host=localhost",
             ]
         )
+
+    # pylint: disable=too-many-statements
+    @step
+    def test_hotbackup_impl(self):
+        """test hotbackup feature: Cluster"""
+        with step("step 1: create a backup"):
+            backup_step_1 = self.create_backup_and_upload("thy_name_is_" + self.name)
+
+        with step("step 2: add new db server"):
+            old_servers = self.get_running_starters()
+            new_starter = self.get_not_running_starters()[-1]
+            new_starter.run_starter_and_wait()
+            self.backup_instance_count += 1
+            self.makedata_instances = self.get_running_starters()
+
+        with step("step 3: create a backup"):
+            backup_step_3 = self.create_backup_and_upload("thy_name_is_" + self.name + "_plus1_server")
+
+        with step("step 4: remove old db server"):
+            self.remove_starter_dbserver(old_servers[0])
+
+        with step("step 5: create another backup"):
+            self.create_backup_and_upload("thy_name_is_" + self.name + "_plus1_server_minus1_server", False)
+
+        with step("step 6: create non-backup data"):
+            self._check_for_shards_in_sync()
+            self.create_non_backup_data()
+            self.tcp_ping_all_nodes()
+
+        with step("step 7: download and restore backup from step 1"):
+            self.download_backup(backup_step_1)
+            self.validate_local_backup(backup_step_1)
+            backups = self.list_backup()
+            if backup_step_1 not in backups:
+                raise Exception("downloaded backup has different name? " + str(backups))
+            self.restore_backup(backup_step_1)
+            self.tcp_ping_all_nodes()
+
+        with step("step 8: check data"):
+            self.check_data_impl()
+            if not self.check_non_backup_data():
+                raise Exception("data created after backup is still there??")
+
+        with step("step 9: add new db server"):
+            new_starter2 = self.get_not_running_starters()[-1]
+            new_starter2.run_starter_and_wait()
+            self.backup_instance_count += 1
+            self.makedata_instances = self.get_running_starters()
+
+        with step("step 10: create non-backup data"):
+            self.create_non_backup_data()
+            self.tcp_ping_all_nodes()
+
+        with step("step 11: download and restore backup from step 3"):
+            self.download_backup(backup_step_3)
+            self.validate_local_backup(backup_step_3)
+            backups = self.list_backup()
+            if backup_step_3 not in backups:
+                raise Exception("downloaded backup has different name? " + str(backups))
+            self.restore_backup(backup_step_3)
+            self.tcp_ping_all_nodes()
+
+        with step("step 12: check data"):
+            self.check_data_impl()
+
+        with step("step 13: remove old db server"):
+            self.remove_starter_dbserver(old_servers[1])
+
+        with step("step 14: create non-backup data"):
+            self._check_for_shards_in_sync()
+            self.create_non_backup_data()
+            self.tcp_ping_all_nodes()
+
+    @step
+    def remove_starter_dbserver(self, starter):
+        """remove dbserver managed by given starter from cluster"""
+        print("removing starter " + repr(starter))
+        terminated_dbserver_uuid = starter.get_dbserver().get_uuid()
+        starter.stop_dbserver()
+        self.remove_server_from_agency(terminated_dbserver_uuid)
+        self.backup_instance_count -= 1
+        self.makedata_instances = self.get_running_starters()
+
+    @step
+    def test_hotbackup_after_upgrade_impl(self):
+        """test hotbackup after upgrade: cluster"""
+        with step("step 1: check data"):
+            self.check_data_impl()
+        with step("step 2: download backup"):
+            latest_backup = self.uploaded_backups[-1]
+            self.download_backup(latest_backup)
+            backups = self.list_backup()
+            if latest_backup not in backups:
+                raise Exception("downloaded backup has different name? " + str(backups))
+        with step("step 3: restore backup"):
+            self.restore_backup(latest_backup)
+            self.tcp_ping_all_nodes()
+        # we don't run checkdata after restore in this function, because it is ran afterwards by in runner.py
+        with step("step 4: delete backups"):
+            self.delete_all_backups()
diff --git a/release_tester/arangodb/starter/deployments/cluster_perf.py b/release_tester/arangodb/starter/deployments/cluster_perf.py
index 451c588e..099a3365 100644
--- a/release_tester/arangodb/starter/deployments/cluster_perf.py
+++ b/release_tester/arangodb/starter/deployments/cluster_perf.py
@@ -101,7 +101,9 @@ def __init__(
             runner_type,
             abort_on_error,
             installer_set,
-            RunnerProperties(rp, "CLUSTER", 400, 600, self.scenario.hot_backup, 6),
+            RunnerProperties(
+                rp, "CLUSTER", 400, 600, self.scenario.hot_backup, 6
+            ),
             selenium,
             selenium_driver_args,
             selenium_include_suites,
diff --git a/release_tester/arangodb/starter/deployments/dc2dc.py b/release_tester/arangodb/starter/deployments/dc2dc.py
index a2adf394..b209134a 100644
--- a/release_tester/arangodb/starter/deployments/dc2dc.py
+++ b/release_tester/arangodb/starter/deployments/dc2dc.py
@@ -114,7 +114,9 @@ def __init__(
             runner_type,
             abort_on_error,
             installer_set,
-            RunnerProperties(rp, name, 0, 4500, True, 12),
+            RunnerProperties(
+                rp, name, 0, 4500, True, 12
+            ),
             selenium,
             selenium_driver_args,
             selenium_include_suites,
diff --git a/release_tester/arangodb/starter/deployments/leaderfollower.py b/release_tester/arangodb/starter/deployments/leaderfollower.py
index 3cf9ad75..a35fc1a1 100644
--- a/release_tester/arangodb/starter/deployments/leaderfollower.py
+++ b/release_tester/arangodb/starter/deployments/leaderfollower.py
@@ -36,7 +36,9 @@ def __init__(
             runner_type,
             abort_on_error,
             installer_set,
-            RunnerProperties(rp, "LeaderFollower", 400, 500, False, 2),
+            RunnerProperties(
+                rp, "LeaderFollower", 400, 500, False, 2
+            ),
             selenium,
             selenium_driver_args,
             selenium_include_suites,
diff --git a/release_tester/arangodb/starter/deployments/none.py b/release_tester/arangodb/starter/deployments/none.py
index 83721f06..35536fe5 100644
--- a/release_tester/arangodb/starter/deployments/none.py
+++ b/release_tester/arangodb/starter/deployments/none.py
@@ -24,7 +24,9 @@ def __init__(
             runner_type,
             abort_on_error,
             installer_set,
-            RunnerProperties(rp, "none", 0, 1, False, 1),
+            RunnerProperties(
+                rp, "none", 0, 1, False, 1
+            ),
             selenium,
             selenium_driver_args,
             selenium_include_suites,
diff --git a/release_tester/arangodb/starter/deployments/runner.py b/release_tester/arangodb/starter/deployments/runner.py
index 5faa7b9e..e8af43f0 100644
--- a/release_tester/arangodb/starter/deployments/runner.py
+++ b/release_tester/arangodb/starter/deployments/runner.py
@@ -111,6 +111,7 @@ def __init__(
         self.old_installer = old_inst
         self.new_installer = new_inst
         self.backup_name = None
+        self.uploaded_backups = []
         self.hot_backup = (
             cfg.hot_backup_supported and properties.supports_hotbackup and self.old_installer.supports_hot_backup()
         )
@@ -275,34 +276,7 @@ def run(self):
                 "{0}{1} Deployment started. Please test the UI!".format((self.versionstr), str(self.name)),
             )
             if self.hot_backup:
-                self.progress(False, "TESTING HOTBACKUP")
-                self.backup_name = self.create_backup("thy_name_is_" + self.name)
-                self.validate_local_backup(self.backup_name)
-                self.tcp_ping_all_nodes()
-                self.create_non_backup_data()
-                taken_backups = self.list_backup()
-                backup_no = len(taken_backups) - 1
-                self.upload_backup(taken_backups[backup_no])
-                self.tcp_ping_all_nodes()
-                self.delete_backup(taken_backups[backup_no])
-                self.tcp_ping_all_nodes()
-                backups = self.list_backup()
-                if len(backups) != len(taken_backups) - 1:
-                    raise Exception("expected backup to be gone, " "but its still there: " + str(backups))
-                self.download_backup(self.backup_name)
-                self.validate_local_backup(self.backup_name)
-                self.tcp_ping_all_nodes()
-                backups = self.list_backup()
-                if backups[len(backups) - 1] != self.backup_name:
-                    raise Exception("downloaded backup has different name? " + str(backups))
-                self.before_backup()
-                self.restore_backup(backups[len(backups) - 1])
-                self.tcp_ping_all_nodes()
-                self.after_backup()
-                time.sleep(20)  # TODO fix
-                self.check_data_impl()
-                if not self.check_non_backup_data():
-                    raise Exception("data created after backup is still there??")
+                self.test_hotbackup()
             if self.dump_restore:
                 self.dump_everything("dump_this_" + self.name)
                 print(self.backup_name)
@@ -311,8 +285,6 @@ def run(self):
                 self.check_data_impl()
 
             if self.new_installer:
-                if self.hot_backup:
-                    self.create_non_backup_data()
                 self.versionstr = "NEW[" + self.new_cfg.version + "] "
 
                 self.upgrade_counter += 1
@@ -341,34 +313,7 @@ def run(self):
                 if self.is_minor_upgrade() and self.new_installer.supports_backup():
                     self.new_installer.check_backup_is_created()
                 if self.hot_backup:
-                    self.check_data_impl()
-                    self.progress(False, "TESTING HOTBACKUP AFTER UPGRADE")
-                    backups = self.list_backup()
-                    self.upload_backup(backups[0])
-                    self.tcp_ping_all_nodes()
-                    self.delete_backup(backups[0])
-                    self.tcp_ping_all_nodes()
-                    backups = self.list_backup()
-                    if len(backups) != 0:
-                        raise Exception("expected backup to be gone, " "but its still there: " + str(backups))
-                    self.download_backup(self.backup_name)
-                    self.validate_local_backup(self.backup_name)
-                    self.tcp_ping_all_nodes()
-                    backups = self.list_backup()
-                    if backups[0] != self.backup_name:
-                        raise Exception("downloaded backup has different name? " + str(backups))
-                    time.sleep(20)  # TODO fix
-                    self.before_backup()
-                    self.restore_backup(backups[0])
-                    self.tcp_ping_all_nodes()
-                    self.after_backup()
-                    if not self.check_non_backup_data():
-                        raise Exception("data created after backup is still there??")
-                    self.delete_backup(backups[0])
-                    self.tcp_ping_all_nodes()
-                    backups = self.list_backup()
-                    if len(backups) != 0:
-                        raise Exception("expected backup to be gone, " "but its still there: " + str(backups))
+                    self.test_hotbackup_after_upgrade()
                 if self.dump_restore:
                     print(self.backup_name)
                     self.restore_everything(self.backup_name)
@@ -389,7 +334,7 @@ def run(self):
                 self.check_data_impl()
                 if not is_keep_db_dir:
                     self.starter_shutdown()
-                    for starter in self.starter_instances:
+                    for starter in self.get_running_starters():
                         starter.detect_fatal_errors()
                 if is_uninstall_now:
                     self.uninstall(self.old_installer if not self.new_installer else self.new_installer)
@@ -411,6 +356,79 @@ def run(self):
 
         self.progress(False, "Runner of type {0} - Finished!".format(str(self.name)))
 
+    def test_hotbackup(self):
+        """test hotbackup"""
+        self.progress(False, "TESTING HOTBACKUP")
+        self.test_hotbackup_impl()
+
+    def test_hotbackup_after_upgrade(self):
+        """test hotbackup after upgrade"""
+        self.progress(False, "TESTING HOTBACKUP AFTER UPGRADE")
+        self.test_hotbackup_after_upgrade_impl()
+
+    @step
+    def test_hotbackup_impl(self):
+        """test hotbackup feature: general implementation"""
+        self.backup_name = self.create_backup("thy_name_is_" + self.name)
+        self.validate_local_backup(self.backup_name)
+        self.tcp_ping_all_nodes()
+        self.create_non_backup_data()
+        taken_backups = self.list_backup()
+        backup_no = len(taken_backups) - 1
+        self.upload_backup(taken_backups[backup_no])
+        self.tcp_ping_all_nodes()
+        self.delete_backup(taken_backups[backup_no])
+        self.tcp_ping_all_nodes()
+        backups = self.list_backup()
+        if len(backups) != len(taken_backups) - 1:
+            raise Exception("expected backup to be gone, " "but its still there: " + str(backups))
+        self.download_backup(self.backup_name)
+        self.validate_local_backup(self.backup_name)
+        self.tcp_ping_all_nodes()
+        backups = self.list_backup()
+        if backups[len(backups) - 1] != self.backup_name:
+            raise Exception("downloaded backup has different name? " + str(backups))
+        self.before_backup()
+        self.restore_backup(backups[len(backups) - 1])
+        self.tcp_ping_all_nodes()
+        self.after_backup()
+        time.sleep(20)  # TODO fix
+        self.check_data_impl()
+        if not self.check_non_backup_data():
+            raise Exception("data created after backup is still there??")
+        self.create_non_backup_data()
+
+    @step
+    def test_hotbackup_after_upgrade_impl(self):
+        """test hotbackup after upgrade: general"""
+        self.check_data_impl()
+        backups = self.list_backup()
+        self.upload_backup(backups[0])
+        self.tcp_ping_all_nodes()
+        self.delete_backup(backups[0])
+        self.tcp_ping_all_nodes()
+        backups = self.list_backup()
+        if len(backups) != 0:
+            raise Exception("expected backup to be gone, " "but its still there: " + str(backups))
+        self.download_backup(self.backup_name)
+        self.validate_local_backup(self.backup_name)
+        self.tcp_ping_all_nodes()
+        backups = self.list_backup()
+        if backups[0] != self.backup_name:
+            raise Exception("downloaded backup has different name? " + str(backups))
+        time.sleep(20)  # TODO fix
+        self.before_backup()
+        self.restore_backup(backups[0])
+        self.tcp_ping_all_nodes()
+        self.after_backup()
+        if not self.check_non_backup_data():
+            raise Exception("data created after backup is still there??")
+        self.delete_backup(backups[0])
+        self.tcp_ping_all_nodes()
+        backups = self.list_backup()
+        if len(backups) != 0:
+            raise Exception("expected backup to be gone, " "but its still there: " + str(backups))
+
     def run_selenium(self):
         """fake to run the full lifecycle flow of this deployment"""
 
@@ -424,12 +442,12 @@ def run_selenium(self):
         )
         self.starter_prepare_env()
         self.finish_setup()  # create the instances...
-        for starter in self.starter_instances:
+        for starter in self.get_running_starters():
             # attach the PID of the starter instance:
             starter.attach_running_starter()
             # find out about its processes:
             starter.detect_instances()
-        print(self.starter_instances)
+        print(self.get_running_starters())
         self.selenium.test_after_install()
         if self.new_installer:
             self.versionstr = "NEW[" + self.new_cfg.version + "] "
@@ -499,7 +517,6 @@ def install(self, inst):
             sys_arangosh.js_version_check()
         # TODO: here we should invoke Makedata for the system installation.
         self.progress(True, "stop system service to make ports available for starter")
-
         inst.stop_service()
 
     def get_selenium_status(self):
@@ -649,7 +666,7 @@ def set_frontend_instances(self):
     def get_frontend_instances(self):
         """fetch all frontend instances"""
         frontends = []
-        for starter in self.starter_instances:
+        for starter in self.get_running_starters():
             if not starter.is_leader:
                 continue
             for frontend in starter.get_frontends():
@@ -659,7 +676,7 @@ def get_frontend_instances(self):
     def get_frontend_starters(self):
         """fetch all frontend instances"""
         frontends = []
-        for starter in self.starter_instances:
+        for starter in self.get_running_starters():
             if not starter.is_leader:
                 continue
             if len(starter.get_frontends()) > 0:
@@ -669,7 +686,7 @@ def get_frontend_starters(self):
     @step
     def tcp_ping_all_nodes(self):
         """check whether all nodes react via tcp connection"""
-        for starter in self.starter_instances:
+        for starter in self.get_running_starters():
             starter.tcp_ping_nodes()
 
     @step
@@ -686,7 +703,7 @@ def print_frontend_instances(self):
     def print_all_instances_table(self):
         """print all http frontends to the user"""
         instances = []
-        for starter in self.starter_instances:
+        for starter in self.get_running_starters():
             instances += starter.get_instance_essentials()
         print_instances_table(instances)
 
@@ -800,27 +817,28 @@ def dump_everything(self, name):
             assert starter.arango_dump, "dump everything: this starter doesn't have an dump instance!"
             self.backup_name = self.cfg.base_test_dir.resolve() / self.basedir / name
             args = [
-                "--include-system-collections",
-                "true",
-                "--overwrite",
-                "true",
-                "--use-experimental-dump",
-                "true",
-                "--all-databases",
-                "true",
-                "--local-writer-threads",
-                "5",
-                "--local-network-threads",
-                "10",
-                "--dbserver-prefetch-batches",
-                "20",
-                "--split-files",
-                "true",
+                '--include-system-collections',
+                'true',
+                '--overwrite',
+                'true',
+                '--use-experimental-dump',
+                'true',
+                '--all-databases',
+                'true',
+                '--local-writer-threads',
+                '5',
+                '--local-network-threads',
+                '10',
+                '--dbserver-prefetch-batches',
+                '20',
+                '--split-files',
+                'true'
             ]
             ret = starter.arango_dump.run_dump_monitored(
-                self.backup_name, args, progressive_timeout=progressive_timeout
-            )
-            # self.after_backup_create_impl()
+                self.backup_name,
+                args,
+                progressive_timeout=progressive_timeout)
+            #self.after_backup_create_impl()
             return ret
         raise Exception("no frontend found.")
 
@@ -846,9 +864,7 @@ def wait_for_self_heal(self, starter):
       }
       throw new Error("foxx routeing not ready on time!");
     }; waitForSelfHeal();
-            """,
-            )
-        )
+                """))
 
     def restore_everything_from_dump(self, starter, path):
         """ do a full restore from a dump """
@@ -967,6 +983,12 @@ def delete_backup(self, name):
             return starter.hb_instance.delete(name)
         raise Exception("no frontend found.")
 
+    @step
+    def delete_all_backups(self):
+        """delete all locally-stored backups"""
+        for backup in self.list_backup():
+            self.delete_backup(backup)
+
     def wait_for_restore_impl(self, backup_starter):
         """wait for all restores to be finished"""
         if self.hot_backup:
@@ -992,7 +1014,9 @@ def upload_backup(self, name, timeout=1200):
                 continue
             assert starter.hb_instance, "upload backup: this starter doesn't have an hb instance!"
             hb_id = starter.hb_instance.upload(name, starter.hb_config, "12345")
-            return starter.hb_instance.upload_status(name, hb_id, self.backup_instance_count, timeout=timeout)
+            starter.hb_instance.upload_status(name, hb_id, self.backup_instance_count, timeout=timeout)
+            self.uploaded_backups.append(name)
+            return
         raise Exception("no frontend found.")
 
     @step
@@ -1009,10 +1033,29 @@ def download_backup(self, name, timeout=1200):
     @step
     def validate_local_backup(self, name):
         """validate the local backup"""
-        for starter in self.starter_instances:
+        for starter in self.get_running_starters():
             assert starter.hb_instance, "download backup: this starter doesn't have an hb instance!"
             starter.hb_instance.validate_local_backup(starter.basedir, name)
 
+    @step
+    def create_backup_and_upload(self, backup_name, delete_local=True):
+        """create a hotbackup, then upload and delete it"""
+        backup_name = self.create_backup(backup_name)
+        self.backup_name = backup_name
+        self.validate_local_backup(self.backup_name)
+        self.tcp_ping_all_nodes()
+        taken_backups = self.list_backup()
+        backup_no = len(taken_backups) - 1
+        self.upload_backup(taken_backups[backup_no])
+        self.tcp_ping_all_nodes()
+        if delete_local:
+            self.delete_backup(taken_backups[backup_no])
+            self.tcp_ping_all_nodes()
+            backups = self.list_backup()
+            if len(backups) != len(taken_backups) - 1:
+                raise Exception("expected backup to be gone, " "but its still there: " + str(backups))
+        return backup_name
+
     @step
     def reload_routing(self):
         """reload the routing"""
@@ -1030,7 +1073,7 @@ def reload_routing(self):
     def search_for_warnings(self, print_lines=True):
         """search for any warnings in any logfiles and dump them to the screen"""
         ret = False
-        for starter in self.starter_instances:
+        for starter in self.get_running_starters():
             print("Ww" * 40)
             starter.search_for_warnings()
             for instance in starter.all_instances:
@@ -1072,7 +1115,7 @@ def zip_test_dir(self):
         if self.cfg.base_test_dir.exists():
             print("zipping test dir")
             if self.hot_backup:
-                for starter in self.starter_instances:
+                for starter in self.get_running_starters():
                     starter.cleanup_hotbackup_in_instance()
                 # we just assume that we might have the "remote" directory in this subdir:
                 backup_dir = self.basedir / "backup"
@@ -1116,7 +1159,7 @@ def cleanup(self, reset_tmp=True):
 
     def _set_logging(self, instance_type):
         """turn on logging for all of instance_type"""
-        for starter_mgr in self.starter_instances:
+        for starter_mgr in self.get_running_starters():
             starter_mgr.send_request(
                 instance_type,
                 requests.put,
@@ -1142,7 +1185,9 @@ def coordinator_set_debug_logging(self):
     @step
     def get_collection_list(self):
         """get a list of collections and their shards"""
-        reply = self.starter_instances[0].send_request(InstanceType.COORDINATOR, requests.get, "/_api/collection", None)
+        reply = self.get_running_starters()[0].send_request(
+            InstanceType.COORDINATOR, requests.get, "/_api/collection", None
+        )
         if reply[0].status_code != 200:
             raise Exception(
                 "get Collections: Unsupported return code" + str(reply[0].status_code) + " - " + str(reply[0].body)
@@ -1159,7 +1204,7 @@ def get_collection_list(self):
 
     def get_collection_cluster_details(self, collection_name):
         """get the shard details for a single collection"""
-        reply = self.starter_instances[0].send_request(
+        reply = self.get_running_starters()[0].send_request(
             InstanceType.COORDINATOR,
             requests.put,
             "/_db/_system/_admin/cluster/collectionShardDistribution",
@@ -1255,7 +1300,7 @@ def set_selenium_instances(self):
     def export_instance_info(self):
         """resemble the testing.js INSTANCEINFO env"""
         starter_structs = []
-        for starter in self.starter_instances:
+        for starter in self.get_running_starters():
             starter_structs.append(starter.get_structure())
         struct = starter_structs[0]
         for starter in starter_structs[1:]:
@@ -1270,7 +1315,7 @@ def remove_server_from_agency(self, server_uuid, deadline=150):
         body = '{"server": "%s"}' % server_uuid
         deadline = datetime.now() + timedelta(seconds=deadline)
         while datetime.now() < deadline:
-            reply = self.starter_instances[0].send_request(
+            reply = self.get_running_starters()[0].send_request(
                 InstanceType.COORDINATOR,
                 requests.post,
                 "/_admin/cluster/removeServer",
@@ -1289,3 +1334,11 @@ def remove_server_from_agency(self, server_uuid, deadline=150):
     def makedata_databases(self):
         """return a list of databases that makedata tests must be ran in"""
         return [["_system", self.props.force_one_shard, 0]] + self.custom_databases.copy()
+
+    def get_running_starters(self):
+        """get list of running starters"""
+        return [starter for starter in self.starter_instances if starter.is_running]
+
+    def get_not_running_starters(self):
+        """get list of not running starters"""
+        return [starter for starter in self.starter_instances if not starter.is_running]
diff --git a/release_tester/arangodb/starter/deployments/single.py b/release_tester/arangodb/starter/deployments/single.py
index 5087b89d..fb8dc515 100644
--- a/release_tester/arangodb/starter/deployments/single.py
+++ b/release_tester/arangodb/starter/deployments/single.py
@@ -36,7 +36,9 @@ def __init__(
             runner_type,
             abort_on_error,
             installer_set,
-            RunnerProperties(rp, "Single", 400, 500, True, 1),
+            RunnerProperties(
+                rp, "Single", 400, 500, True, 1
+            ),
             selenium,
             selenium_driver_args,
             selenium_include_suites,
diff --git a/release_tester/arangodb/starter/manager.py b/release_tester/arangodb/starter/manager.py
index 237fcde9..2242676e 100644
--- a/release_tester/arangodb/starter/manager.py
+++ b/release_tester/arangodb/starter/manager.py
@@ -45,6 +45,7 @@
 
 IS_WINDOWS = sys.platform == "win32"
 
+DEFAULT_ENCRYPTION_AT_REST_KEY="defaultencatrestkey_32chars_xxxx"
 
 # pylint: disable=too-many-lines disable=logging-fstring-interpolation
 class StarterManager:
@@ -122,7 +123,7 @@ def __init__(
         if self.cfg.encryption_at_rest:
             self.keyfile = self.basedir / "key.txt"
             # generate pseudo random key of length 32:
-            self.keyfile.write_text((str(datetime.datetime.now()) * 5)[0:32])
+            self.keyfile.write_text(DEFAULT_ENCRYPTION_AT_REST_KEY)
             self.moreopts += ["--rocksdb.encryption-keyfile", str(self.keyfile)]
         self.hb_instance = None
         self.hb_config = None
@@ -312,25 +313,26 @@ def get_sync_masters(self):
     def get_frontend(self):
         """get the first frontendhost of this starter"""
         servers = self.get_frontends()
-        assert servers, "starter: don't have instances!"
+        print(repr(self))
+        assert servers, "starter: don't have instances!" + repr(self)
         return servers[0]
 
     def get_dbserver(self):
         """get the first dbserver of this starter"""
         servers = self.get_dbservers()
-        assert servers, "starter: don't have instances!"
+        assert servers, "starter: don't have instances!" + repr(self)
         return servers[0]
 
     def get_agent(self):
         """get the first agent of this starter"""
         servers = self.get_agents()
-        assert servers, "starter: have no instances!"
+        assert servers, "starter: have no instances!" + repr(self)
         return servers[0]
 
     def get_sync_master(self):
         """get the first arangosync master of this starter"""
         servers = self.get_sync_masters()
-        assert servers, "starter: don't have instances!"
+        assert servers, "starter: don't have instances!" + repr(self)
         return servers[0]
 
     def have_this_instance(self, instance):
@@ -380,6 +382,22 @@ def run_starter(self, expect_to_fail=False):
         if not expect_to_fail:
             self.wait_for_logfile()
             self.wait_for_port_bind()
+        self.is_running = True
+
+    @step
+    def run_starter_and_wait(self):
+        """launch the starter and wait for all arnagod instances to come up"""
+        self.run_starter()
+        count = 0
+        while not self.is_instance_up():
+            logging.debug("waiting for mananger with logfile:" + str(self.log_file))
+            progress(".")
+            time.sleep(1)
+            count += 1
+            if count > 120:
+                raise Exception("Starter manager installation didn't come up in two minutes!")
+        self.detect_instances()
+        self.detect_instance_pids()
 
     @step
     def attach_running_starter(self):
@@ -449,7 +467,8 @@ def set_passvoid(self, passvoid, write_to_server=True):
             self.arangosh.js_set_passvoid("root", passvoid)
             self.passvoidfile.write_text(passvoid, encoding="utf-8")
         else:
-            self.arangosh.cfg.passvoid = passvoid
+            if self.arangosh:
+                self.arangosh.cfg.passvoid = passvoid
             self.passvoidfile.write_text(passvoid, encoding="utf-8")
         self.passvoid = passvoid
         for i in self.all_instances:
@@ -683,7 +702,7 @@ def replace_binary_for_upgrade(self, new_install_cfg, relaunch=True):
         self.replace_binary_setup_for_upgrade(new_install_cfg)
         with step("kill the starter processes of the old version"):
             if self.instance is None:
-                logging.error("StarterManager: don't have an instance!!")
+                logging.error("StarterManager: don't have an instance!!" + repr(self))
             else:
                 logging.info("StarterManager: Killing my instance [%s]", str(self.instance.pid))
                 self.kill_instance()
@@ -978,14 +997,14 @@ def get_log_file(self):
     def read_db_logfile(self):
         """get the logfile of the dbserver instance"""
         server = self.get_dbserver()
-        assert server.logfile.exists(), "don't have logfile?"
+        assert server.logfile.exists(), "don't have logfile?" + repr(self)
         return server.logfile.read_text(errors="backslashreplace")
 
     @step
     def read_agent_logfile(self):
         """get the agent logfile of this instance"""
         server = self.get_agent()
-        assert server.logfile.exists(), "don't have logfile?"
+        assert server.logfile.exists(), "don't have logfile?" + repr(self)
         return server.logfile.read_text(errors="backslashreplace")
 
     @step
@@ -1296,6 +1315,15 @@ def count_occurances_in_starter_log(self, substring: str):
         number_of_occurances = self.get_log_file().count(substring)
         return number_of_occurances
 
+    def stop_dbserver(self):
+        """stop db server managed by this starter"""
+        dbserver = self.get_dbserver()
+        self.kill_instance()
+        dbserver.terminate_instance()
+        self.all_instances.remove(dbserver)
+        self.moreopts.append("--cluster.start-dbserver=false")
+        self.run_starter()
+
 
 class StarterNonManager(StarterManager):
     """this class is a dummy starter manager to work with similar interface"""