From 941cba4298d5860ba1b67410b7c80f82794ab0b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 19 Mar 2025 13:01:59 +0100
Subject: [PATCH 01/56] chore: libfranka version bump

---
 .gitignore     | 2 +-
 CMakeLists.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index fff58551..583db86e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,7 +8,7 @@ __pycache__
 .ruff_cache
 .mypy_cache
 dist
-.venv
+.venv*
 config.yaml
 MUJOCO_LOG.TXT
 src/pybind/mujoco
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3c8896ff..f5156447 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -53,7 +53,7 @@ find_package(MuJoCo REQUIRED)
 FetchContent_Declare(
     libfranka
     GIT_REPOSITORY https://github.com/frankaemika/libfranka.git
-    GIT_TAG 0.14.2
+    GIT_TAG 0.15.0
     GIT_PROGRESS TRUE
     EXCLUDE_FROM_ALL
 )

From 2f916e0b3720dec25b518c7daf4fa9e9c3979e47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Thu, 20 Mar 2025 16:26:41 +0100
Subject: [PATCH 02/56] feat: added first osc implementation

---
 python/rcsss/_core/hw/__init__.pyi      |   6 +
 python/rcsss/control/fr3_desk.py        |   6 +
 src/common/LinearPoseTrajInterpolator.h | 100 +++++
 src/hw/FR3.cpp                          | 575 ++++++++++++++++++++++++
 src/hw/FR3.h                            |  22 +
 src/pybind/rcsss.cpp                    |   4 +
 6 files changed, 713 insertions(+)
 create mode 100644 src/common/LinearPoseTrajInterpolator.h

diff --git a/python/rcsss/_core/hw/__init__.pyi b/python/rcsss/_core/hw/__init__.pyi
index b3423d24..daeca1cb 100644
--- a/python/rcsss/_core/hw/__init__.pyi
+++ b/python/rcsss/_core/hw/__init__.pyi
@@ -52,6 +52,10 @@ class FR3(rcsss._core.common.Robot):
     def double_tap_robot_to_continue(self) -> None: ...
     def get_parameters(self) -> FR3Config: ...
     def get_state(self) -> FR3State: ...
+    def osc2_set_cartesian_position(self, desired_pose_EE_in_base_frame: rcsss._core.common.Pose) -> None: ...
+    def osc_set_cartesian_position(
+        self, desired_pos_EE_in_base_frame: numpy.ndarray[typing.Literal[3], numpy.dtype[numpy.float64]]
+    ) -> None: ...
     def set_cartesian_position_ik(
         self, pose: rcsss._core.common.Pose, max_time: float, elbow: float | None, max_force: float | None = 5
     ) -> None: ...
@@ -59,6 +63,8 @@ class FR3(rcsss._core.common.Robot):
     def set_default_robot_behavior(self) -> None: ...
     def set_guiding_mode(self, enabled: bool) -> None: ...
     def set_parameters(self, cfg: FR3Config) -> bool: ...
+    def stop_control_thread(self) -> None: ...
+    def zero_torque(self) -> None: ...
 
 class FR3Config(rcsss._core.common.RConfig):
     controller: IKController
diff --git a/python/rcsss/control/fr3_desk.py b/python/rcsss/control/fr3_desk.py
index f35797d9..d20aa73a 100644
--- a/python/rcsss/control/fr3_desk.py
+++ b/python/rcsss/control/fr3_desk.py
@@ -161,6 +161,11 @@ def __init__(self, hostname: str, username: str, password: str) -> None:
             "down": False,
             "up": False,
         }
+        # Create an SSLContext that doesn't verify certificates
+        self.ssl_context = ssl.create_default_context()
+        self.ssl_context.check_hostname = False  # Disable hostname verification
+        self.ssl_context.verify_mode = ssl.CERT_NONE  # Disable certificate verification
+
         self.login()
 
     def __enter__(self) -> "Desk":
@@ -322,6 +327,7 @@ def take_control(self, force: bool = False) -> bool:
                 f"wss://{self._hostname}/desk/api/navigation/events",
                 server_hostname="robot.franka.de",
                 additional_headers={"authorization": self._session.cookies.get("authorization")},  # type: ignore[arg-type]
+                ssl_context=self.ssl_context,
             ) as websocket:
                 while True:
                     event: dict = json_module.loads(websocket.recv(timeout))
diff --git a/src/common/LinearPoseTrajInterpolator.h b/src/common/LinearPoseTrajInterpolator.h
new file mode 100644
index 00000000..96b9406d
--- /dev/null
+++ b/src/common/LinearPoseTrajInterpolator.h
@@ -0,0 +1,100 @@
+#ifndef DEOXYS_FRANKA_INTERFACE_INCLUDE_UTILS_TRAJ_INTERPOLATORS_POSE_TRAJ_INTERPOLATOR_H_
+#define DEOXYS_FRANKA_INTERFACE_INCLUDE_UTILS_TRAJ_INTERPOLATORS_POSE_TRAJ_INTERPOLATOR_H_
+#include <Eigen/Dense>
+
+namespace rcs {
+namespace common {
+
+class LinearPoseTrajInterpolator {
+ private:
+  Eigen::Vector3d p_start_;
+  Eigen::Vector3d p_goal_;
+  Eigen::Vector3d last_p_t_;
+  Eigen::Vector3d prev_p_goal_;
+
+  Eigen::Quaterniond q_start_;
+  Eigen::Quaterniond q_goal_;
+  Eigen::Quaterniond last_q_t_;
+  Eigen::Quaterniond prev_q_goal_;
+
+  double dt_;
+  double last_time_;
+  double max_time_;
+  double start_time_;
+  bool start_;
+  bool first_goal_;
+
+ public:
+  inline LinearPoseTrajInterpolator()
+      : dt_(0.),
+        last_time_(0.),
+        max_time_(1.),
+        start_time_(0.),
+        start_(false),
+        first_goal_(true) {};
+
+  inline ~LinearPoseTrajInterpolator() {};
+
+  inline void Reset(const double &time_sec, const Eigen::Vector3d &p_start,
+                    const Eigen::Quaterniond &q_start,
+                    const Eigen::Vector3d &p_goal,
+                    const Eigen::Quaterniond &q_goal, const int &policy_rate,
+                    const int &rate,
+                    const double &traj_interpolator_time_fraction) {
+    dt_ = 1. / static_cast<double>(rate);
+    last_time_ = time_sec;
+    max_time_ =
+        1. / static_cast<double>(policy_rate) * traj_interpolator_time_fraction;
+    start_time_ = time_sec;
+
+    start_ = false;
+
+    if (first_goal_) {
+      p_start_ = p_start;
+      q_start_ = q_start;
+
+      prev_p_goal_ = p_start;
+      prev_q_goal_ = q_start;
+      first_goal_ = false;
+    } else {
+      // If the goal is already set, use prev goal as the starting point of
+      // interpolation.
+      prev_p_goal_ = p_goal_;
+      prev_q_goal_ = q_goal_;
+
+      p_start_ = prev_p_goal_;
+      q_start_ = prev_q_goal_;
+    }
+
+    p_goal_ = p_goal;
+    q_goal_ = q_goal;
+
+    // Flip the sign if the dot product of quaternions is negative
+    if (q_goal_.coeffs().dot(q_start_.coeffs()) < 0.0) {
+      q_start_.coeffs() << -q_start_.coeffs();
+    }
+  };
+
+  inline void GetNextStep(const double &time_sec, Eigen::Vector3d &p_t,
+                          Eigen::Quaterniond &q_t) {
+    if (!start_) {
+      start_time_ = time_sec;
+      last_p_t_ = p_start_;
+      last_q_t_ = q_start_;
+      start_ = true;
+    }
+
+    if (last_time_ + dt_ <= time_sec) {
+      double t =
+          std::min(std::max((time_sec - start_time_) / max_time_, 0.), 1.);
+      last_p_t_ = p_start_ + t * (p_goal_ - p_start_);
+      last_q_t_ = q_start_.slerp(t, q_goal_);
+      last_time_ = time_sec;
+    }
+    p_t = last_p_t_;
+    q_t = last_q_t_;
+  };
+};
+}  // namespace common
+}  // namespace rcs
+#endif  // DEOXYS_FRANKA_INTERFACE_INCLUDE_UTILS_TRAJ_INTERPOLATORS_POSE_TRAJ_INTERPOLATOR_H_
diff --git a/src/hw/FR3.cpp b/src/hw/FR3.cpp
index d746c7a7..bfa7f09a 100644
--- a/src/hw/FR3.cpp
+++ b/src/hw/FR3.cpp
@@ -1,5 +1,9 @@
 #include "FR3.h"
 
+#include <franka/duration.h>
+#include <franka/exception.h>
+#include <franka/model.h>
+#include <franka/rate_limiting.h>
 #include <franka/robot.h>
 
 #include <Eigen/Core>
@@ -106,6 +110,577 @@ void FR3::set_guiding_mode(bool enabled) {
   this->cfg.guiding_mode_enabled = enabled;
 }
 
+void PInverse(const Eigen::MatrixXd &M, Eigen::MatrixXd &M_inv,
+              double epsilon = 0.00025) {
+  Eigen::JacobiSVD<Eigen::MatrixXd> svd(
+      M, Eigen::ComputeFullU | Eigen::ComputeFullV);
+  Eigen::JacobiSVD<Eigen::MatrixXd>::SingularValuesType singular_vals =
+      svd.singularValues();
+
+  Eigen::MatrixXd S_inv = M;
+  S_inv.setZero();
+  for (int i = 0; i < singular_vals.size(); i++) {
+    if (singular_vals(i) < epsilon) {
+      S_inv(i, i) = 0.;
+    } else {
+      S_inv(i, i) = 1. / singular_vals(i);
+    }
+  }
+  M_inv = Eigen::MatrixXd(svd.matrixV() * S_inv * svd.matrixU().transpose());
+}
+
+void TorqueSafetyGuardFn(std::array<double, 7> &tau_d_array, double min_torque,
+                         double max_torque) {
+  for (size_t i = 0; i < tau_d_array.size(); i++) {
+    if (tau_d_array[i] < min_torque) {
+      tau_d_array[i] = min_torque;
+    } else if (tau_d_array[i] > max_torque) {
+      tau_d_array[i] = max_torque;
+    }
+  }
+}
+
+void FR3::osc2_set_cartesian_position(
+    const common::Pose &desired_pose_EE_in_base_frame) {
+
+  // auto pose = this->get_cartesian_position();
+
+  // from deoxys/config/osc-position-controller.yml
+  double traj_interpolation_time_fraction = 0.3;
+  // form deoxys/config/charmander.yml
+  int policy_rate = 20;
+  int traj_rate = 500;
+
+  if (!this->control_thread_running) {
+    this->controller_time = 0.0;
+    this->curr_pose = this->get_cartesian_position();
+  } else {
+    this->traj_interpolator_mutex.lock();
+  }
+
+  this->traj_interpolator.Reset(
+      this->controller_time, this->curr_pose.translation(), this->curr_pose.quaternion(),
+      desired_pose_EE_in_base_frame.translation(), desired_pose_EE_in_base_frame.quaternion(),
+      policy_rate, traj_rate, traj_interpolation_time_fraction);
+
+  // if not thread is running, then start
+  if (!this->control_thread_running) {
+    this->control_thread_running = true;
+    this->control_thread = std::thread(&FR3::osc2, this);
+  } else {
+    this->traj_interpolator_mutex.unlock();
+  }
+}
+
+void FR3::osc_set_cartesian_position(
+    const Eigen::Vector3d &desired_pos_EE_in_base_frame) {
+  // this->osc_desired_pos_EE_in_base_frame_mutex.lock();
+  // this->osc_desired_pos_EE_in_base_frame = desired_pos_EE_in_base_frame;
+  // this->osc_desired_pos_EE_in_base_frame_mutex.unlock();
+
+  Eigen::Quaterniond fixed_desired_quat_EE_in_base_frame(0., 1., 0., 0.);
+
+  // from deoxys/config/osc-position-controller.yml
+  double traj_interpolation_time_fraction = 0.3;
+  // form deoxys/config/charmander.yml
+  int policy_rate = 20;
+  int traj_rate = 500;
+
+  if (!this->control_thread_running) {
+    this->controller_time = 0.0;
+    // auto pose = this->get_cartesian_position();
+    this->curr_pose = this->get_cartesian_position();
+  } else {
+    this->traj_interpolator_mutex.lock();
+  }
+
+  this->traj_interpolator.Reset(
+      this->controller_time, this->curr_pose.translation(), this->curr_pose.quaternion(),
+      desired_pos_EE_in_base_frame, fixed_desired_quat_EE_in_base_frame,
+      policy_rate, traj_rate, traj_interpolation_time_fraction);
+
+  // if not thread is running, then start
+  if (!this->control_thread_running) {
+    this->control_thread_running = true;
+    this->control_thread = std::thread(&FR3::osc, this);
+  } else {
+    this->traj_interpolator_mutex.unlock();
+  }
+}
+
+// method to stop thread
+void FR3::stop_control_thread() {
+  if (this->control_thread.has_value() && this->control_thread_running) {
+    this->control_thread_running = false;
+    this->control_thread->join();
+    this->control_thread.reset();
+    // this->osc_desired_pos_EE_in_base_frame.reset();
+  }
+}
+
+void FR3::osc() {
+  franka::Model model = this->robot.loadModel();
+
+  this->controller_time = 0.0;
+
+  this->robot.setCollisionBehavior(
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}});
+
+  // from bench mark
+  // ([150.0, 150.0, 60.0], 250.0), // kp_translation, kp_rotation
+  // ([60.0, 150.0, 150.0], 250.0), // kd_translation, kd_rotation
+
+  // from config file
+  // Kp:
+  // translation: [150.0, 150.0, 150.0]
+  // rotation: 250.0
+
+  Eigen::Matrix<double, 3, 3> Kp_p, Kp_r, Kd_p, Kd_r;
+  Eigen::Matrix<double, 7, 1> static_q_task_;
+  // Eigen::Vector3d prev_goal_pos_EE_in_base_frame;
+  Eigen::Matrix<double, 7, 1> residual_mass_vec_;
+
+  // values from deoxys/config/osc-position-controller.yml
+  Kp_p.diagonal() << 150, 150, 150;
+  Kp_r.diagonal() << 250, 250, 250;
+
+  Kd_p << Kp_p.cwiseSqrt() * 2.0;
+  Kd_r << Kp_r.cwiseSqrt() * 2.0;
+
+  static_q_task_ << 0.09017809387254755, -0.9824203501652151,
+      0.030509718397568178, -2.694229634937343, 0.057700675144720104,
+      1.860298714876101, 0.8713759453244422;
+
+  // The manual residual mass matrix to add on the internal mass matrix
+  residual_mass_vec_ << 0.0, 0.0, 0.0, 0.0, 0.1, 0.5, 0.5;
+
+  this->robot.control([&](const franka::RobotState &robot_state,
+                          franka::Duration period) -> franka::Torques {
+    std::chrono::high_resolution_clock::time_point t1 =
+        std::chrono::high_resolution_clock::now();
+
+    // torques handler
+    if (!this->control_thread_running) {
+      franka::Torques zero_torques{{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}};
+      return franka::MotionFinished(zero_torques);
+    }
+    // TO BE replaced
+    // if (!this->control_thread_running && dq.maxCoeff() < 0.0001) {
+    //   return franka::MotionFinished(franka::Torques(tau_d_array));
+    // }
+
+    Eigen::Vector3d desired_pos_EE_in_base_frame;
+    Eigen::Quaterniond desired_quat_EE_in_base_frame;
+
+    common::Pose pose(robot_state.O_T_EE);
+    // auto pose = this->get_cartesian_position();
+    // from deoxys/config/osc-position-controller.yml
+    double traj_interpolation_time_fraction = 0.3;
+    // form deoxys/config/charmander.yml
+    int policy_rate = 20;
+    int traj_rate = 500;
+
+    // w, x, y, z
+    // TODO: change this non fixed?
+    Eigen::Quaterniond fixed_desired_quat_EE_in_base_frame(0., 1., 0., 0.);
+
+    this->traj_interpolator_mutex.lock();
+    // if (this->controller_time == 0) {
+    //   this->traj_interpolator.Reset(
+    //       0., pose.translation(), pose.quaternion(),
+    //       desired_pos_EE_in_base_frame, fixed_desired_quat_EE_in_base_frame,
+    //       policy_rate, traj_rate, traj_interpolation_time_fraction);
+    // }
+    this->curr_pose = common::Pose(robot_state.O_T_EE);
+    this->controller_time += period.toSec();
+    this->traj_interpolator.GetNextStep(this->controller_time,
+                                        desired_pos_EE_in_base_frame,
+                                        desired_quat_EE_in_base_frame);
+    this->traj_interpolator_mutex.unlock();
+
+    // end torques handler
+
+    Eigen::Matrix<double, 7, 1> tau_d;
+
+    std::array<double, 49> mass_array = model.mass(robot_state);
+
+    Eigen::Map<Eigen::Matrix<double, 7, 7>> M(mass_array.data());
+
+    M = M + Eigen::Matrix<double, 7, 7>(residual_mass_vec_.asDiagonal());
+
+    // coriolis and gravity
+    std::array<double, 7> coriolis_array = model.coriolis(robot_state);
+    Eigen::Map<const Eigen::Matrix<double, 7, 1>> coriolis(
+        coriolis_array.data());
+
+    std::array<double, 7> gravity_array = model.gravity(robot_state);
+    Eigen::Map<const Eigen::Matrix<double, 7, 1>> gravity(gravity_array.data());
+
+    std::array<double, 42> jacobian_array =
+        model.zeroJacobian(franka::Frame::kEndEffector, robot_state);
+    Eigen::Map<const Eigen::Matrix<double, 6, 7>> jacobian(
+        jacobian_array.data());
+
+    Eigen::MatrixXd jacobian_pos(3, 7);
+    Eigen::MatrixXd jacobian_ori(3, 7);
+    jacobian_pos << jacobian.block(0, 0, 3, 7);
+    jacobian_ori << jacobian.block(3, 0, 3, 7);
+
+    // End effector pose in base frame
+    Eigen::Affine3d T_EE_in_base_frame(
+        Eigen::Matrix4d::Map(robot_state.O_T_EE.data()));
+    Eigen::Vector3d pos_EE_in_base_frame(T_EE_in_base_frame.translation());
+    Eigen::Quaterniond quat_EE_in_base_frame(T_EE_in_base_frame.linear());
+
+    // Joint velocity
+    Eigen::Map<const Eigen::Matrix<double, 7, 1>> dq(robot_state.dq.data());
+
+    // Nullspace goal
+    Eigen::Map<const Eigen::Matrix<double, 7, 1>> q(robot_state.q.data());
+
+    // Update the specified state estimator
+    // if (this->state_estimator_ptr_->IsFirstState()) {
+    //   this->state_estimator_ptr_->Initialize(q, dq, pos_EE_in_base_frame,
+    //                                          quat_EE_in_base_frame);
+    // } else {
+    //   this->state_estimator_ptr_->Update(q, dq, pos_EE_in_base_frame,
+    //                                      quat_EE_in_base_frame);
+    // }
+
+    Eigen::Matrix<double, 7, 1> current_q, current_dq;
+    // Get state from a specified state estimator
+    // current_q = this->state_estimator_ptr_->GetCurrentJointPos();
+    // current_dq = this->state_estimator_ptr_->GetCurrentJointVel();
+    current_q = q;    // this->state_estimator_ptr_->GetCurrentJointPos();
+    current_dq = dq;  // this->state_estimator_ptr_->GetCurrentJointVel();
+
+    // Get eef states from a specified state estimator
+    // pos_EE_in_base_frame = this->state_estimator_ptr_->GetCurrentEEFPos();
+    // quat_EE_in_base_frame = this->state_estimator_ptr_->GetCurrentEEFQuat();
+
+    if (fixed_desired_quat_EE_in_base_frame.coeffs().dot(
+            quat_EE_in_base_frame.coeffs()) < 0.0) {
+      quat_EE_in_base_frame.coeffs() << -quat_EE_in_base_frame.coeffs();
+    }
+
+    Eigen::Vector3d pos_error;
+
+    pos_error << desired_pos_EE_in_base_frame - pos_EE_in_base_frame;
+
+    Eigen::Quaterniond quat_error(
+        fixed_desired_quat_EE_in_base_frame.inverse() * quat_EE_in_base_frame);
+    Eigen::Vector3d ori_error;
+    ori_error << quat_error.x(), quat_error.y(), quat_error.z();
+    ori_error << -T_EE_in_base_frame.linear() * ori_error;
+
+    // Compute matrices
+    Eigen::Matrix<double, 7, 7> M_inv(M.inverse());
+    Eigen::MatrixXd Lambda_inv(6, 6);
+    Lambda_inv << jacobian * M_inv * jacobian.transpose();
+    Eigen::MatrixXd Lambda(6, 6);
+    PInverse(Lambda_inv, Lambda);
+
+    Eigen::Matrix<double, 7, 6> J_inv;
+    J_inv << M_inv * jacobian.transpose() * Lambda;
+    Eigen::Matrix<double, 7, 7> Nullspace;
+    Nullspace << Eigen::MatrixXd::Identity(7, 7) -
+                     jacobian.transpose() * J_inv.transpose();
+
+    // Decoupled mass matrices
+    Eigen::MatrixXd Lambda_pos_inv(3, 3);
+    Lambda_pos_inv << jacobian_pos * M_inv * jacobian_pos.transpose();
+    Eigen::MatrixXd Lambda_ori_inv(3, 3);
+    Lambda_ori_inv << jacobian_ori * M_inv * jacobian_ori.transpose();
+
+    Eigen::MatrixXd Lambda_pos(3, 3);
+    Eigen::MatrixXd Lambda_ori(3, 3);
+    PInverse(Lambda_pos_inv, Lambda_pos);
+    PInverse(Lambda_ori_inv, Lambda_ori);
+
+    pos_error =
+        pos_error.unaryExpr([](double x) { return (abs(x) < 1e-4) ? 0. : x; });
+    ori_error =
+        ori_error.unaryExpr([](double x) { return (abs(x) < 5e-3) ? 0. : x; });
+
+    tau_d << jacobian_pos.transpose() *
+                     (Lambda_pos *
+                      (Kp_p * pos_error - Kd_p * (jacobian_pos * current_dq))) +
+                 jacobian_ori.transpose() *
+                     (Lambda_ori *
+                      (Kp_r * ori_error - Kd_r * (jacobian_ori * current_dq)));
+
+    // compensation - Try low gain with compensation?
+    // tau_d << tau_d + coriolis;
+    // tau_d << tau_d + gravity;
+    // nullspace control
+    tau_d << tau_d + Nullspace * (static_q_task_ - current_q);
+    // std::cout << "Nullspace : " << (Nullspace * (static_q_task_ -
+    // q)).transpose() << std::endl;
+
+    std::array<double, 7> tau_d_array{};
+    Eigen::VectorXd::Map(&tau_d_array[0], 7) = tau_d;
+
+    std::chrono::high_resolution_clock::time_point t2 =
+        std::chrono::high_resolution_clock::now();
+    auto time = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1);
+    // std::cout << "OSC step took: " << time.count() << " ms"  << std::endl;
+
+    std::array<double, 7> tau_d_rate_limited = franka::limitRate(
+        franka::kMaxTorqueRate, tau_d_array, robot_state.tau_J_d);
+
+    // deoxys/config/control_config.yml
+    double min_torque = -5;
+    double max_torque = 5;
+    TorqueSafetyGuardFn(tau_d_rate_limited, min_torque, max_torque);
+
+    return tau_d_rate_limited;
+  });
+}
+
+void FR3::osc2() {
+  franka::Model model = this->robot.loadModel();
+
+  this->controller_time = 0.0;
+
+  this->robot.setCollisionBehavior(
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}});
+
+  // from bench mark
+  // ([150.0, 150.0, 60.0], 250.0), // kp_translation, kp_rotation
+  // ([60.0, 150.0, 150.0], 250.0), // kd_translation, kd_rotation
+
+  // from config file
+  // Kp:
+  // translation: [150.0, 150.0, 150.0]
+  // rotation: 250.0
+
+  Eigen::Matrix<double, 3, 3> Kp_p, Kp_r, Kd_p, Kd_r;
+  Eigen::Matrix<double, 7, 1> static_q_task_;
+  Eigen::Matrix<double, 7, 1> residual_mass_vec_;
+  Eigen::Array<double, 7, 1> joint_max_;
+  Eigen::Array<double, 7, 1> joint_min_;
+  Eigen::Array<double, 7, 1> avoidance_weights_;
+
+  // values from deoxys/config/osc-position-controller.yml
+  Kp_p.diagonal() << 150, 150, 150;
+  Kp_r.diagonal() << 250, 250, 250;
+
+  Kd_p << Kp_p.cwiseSqrt() * 2.0;
+  Kd_r << Kp_r.cwiseSqrt() * 2.0;
+
+  static_q_task_ << 0.09017809387254755, -0.9824203501652151,
+      0.030509718397568178, -2.694229634937343, 0.057700675144720104,
+      1.860298714876101, 0.8713759453244422;
+
+  // The manual residual mass matrix to add on the internal mass matrix
+  residual_mass_vec_ << 0.0, 0.0, 0.0, 0.0, 0.1, 0.5, 0.5;
+
+
+  joint_max_ << 2.8978, 1.7628, 2.8973, -0.0698, 2.8973, 3.7525, 2.8973;
+  joint_min_ << -2.8973, -1.7628, -2.8973, -3.0718, -2.8973, -0.0175, -2.8973;
+  avoidance_weights_ << 1., 1., 1., 1., 1., 10., 10.;
+
+  this->robot.control([&](const franka::RobotState &robot_state,
+                          franka::Duration period) -> franka::Torques {
+    std::chrono::high_resolution_clock::time_point t1 =
+        std::chrono::high_resolution_clock::now();
+
+    // torques handler
+    if (!this->control_thread_running) {
+      franka::Torques zero_torques{{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}};
+      return franka::MotionFinished(zero_torques);
+    }
+    // TO BE replaced
+    // if (!this->control_thread_running && dq.maxCoeff() < 0.0001) {
+    //   return franka::MotionFinished(franka::Torques(tau_d_array));
+    // }
+
+    Eigen::Vector3d desired_pos_EE_in_base_frame;
+    Eigen::Quaterniond desired_quat_EE_in_base_frame;
+
+    common::Pose pose(robot_state.O_T_EE);
+    // auto pose = this->get_cartesian_position();
+    // from deoxys/config/osc-position-controller.yml
+    double traj_interpolation_time_fraction = 0.3;
+    // form deoxys/config/charmander.yml
+    int policy_rate = 20;
+    int traj_rate = 500;
+
+
+    this->traj_interpolator_mutex.lock();
+    // if (this->controller_time == 0) {
+    //   this->traj_interpolator.Reset(
+    //       0., pose.translation(), pose.quaternion(),
+    //       desired_pos_EE_in_base_frame, fixed_desired_quat_EE_in_base_frame,
+    //       policy_rate, traj_rate, traj_interpolation_time_fraction);
+    // }
+    this->controller_time += period.toSec();
+    this->traj_interpolator.GetNextStep(this->controller_time,
+                                        desired_pos_EE_in_base_frame,
+                                        desired_quat_EE_in_base_frame);
+    this->traj_interpolator_mutex.unlock();
+
+    // end torques handler
+
+    Eigen::Matrix<double, 7, 1> tau_d;
+
+    std::array<double, 49> mass_array = model.mass(robot_state);
+    Eigen::Map<Eigen::Matrix<double, 7, 7>> M(mass_array.data());
+
+    M = M + Eigen::Matrix<double, 7, 7>(residual_mass_vec_.asDiagonal());
+
+    // coriolis and gravity
+    std::array<double, 7> coriolis_array = model.coriolis(robot_state);
+    Eigen::Map<const Eigen::Matrix<double, 7, 1>> coriolis(
+        coriolis_array.data());
+
+    std::array<double, 7> gravity_array = model.gravity(robot_state);
+    Eigen::Map<const Eigen::Matrix<double, 7, 1>> gravity(gravity_array.data());
+
+    std::array<double, 42> jacobian_array =
+        model.zeroJacobian(franka::Frame::kEndEffector, robot_state);
+    Eigen::Map<const Eigen::Matrix<double, 6, 7>> jacobian(
+        jacobian_array.data());
+
+    Eigen::MatrixXd jacobian_pos(3, 7);
+    Eigen::MatrixXd jacobian_ori(3, 7);
+    jacobian_pos << jacobian.block(0, 0, 3, 7);
+    jacobian_ori << jacobian.block(3, 0, 3, 7);
+
+    // End effector pose in base frame
+    Eigen::Affine3d T_EE_in_base_frame(
+        Eigen::Matrix4d::Map(robot_state.O_T_EE.data()));
+    Eigen::Vector3d pos_EE_in_base_frame(T_EE_in_base_frame.translation());
+    Eigen::Quaterniond quat_EE_in_base_frame(T_EE_in_base_frame.linear());
+
+    // Nullspace goal
+    Eigen::Map<const Eigen::Matrix<double, 7, 1>> q(robot_state.q.data());
+
+    // Joint velocity
+    Eigen::Map<const Eigen::Matrix<double, 7, 1>> dq(robot_state.dq.data());
+
+
+    if (desired_quat_EE_in_base_frame.coeffs().dot(
+            quat_EE_in_base_frame.coeffs()) < 0.0) {
+      quat_EE_in_base_frame.coeffs() << -quat_EE_in_base_frame.coeffs();
+    }
+
+    Eigen::Vector3d pos_error;
+
+    pos_error << desired_pos_EE_in_base_frame - pos_EE_in_base_frame;
+    Eigen::Quaterniond quat_error(desired_quat_EE_in_base_frame.inverse() *
+                                  quat_EE_in_base_frame);
+    Eigen::Vector3d ori_error;
+    ori_error << quat_error.x(), quat_error.y(), quat_error.z();
+    ori_error << -T_EE_in_base_frame.linear() * ori_error;
+
+    // Compute matrices
+    Eigen::Matrix<double, 7, 7> M_inv(M.inverse());
+    Eigen::MatrixXd Lambda_inv(6, 6);
+    Lambda_inv << jacobian * M_inv * jacobian.transpose();
+    Eigen::MatrixXd Lambda(6, 6);
+    PInverse(Lambda_inv, Lambda);
+
+    Eigen::Matrix<double, 7, 6> J_inv;
+    J_inv << M_inv * jacobian.transpose() * Lambda;
+    Eigen::Matrix<double, 7, 7> Nullspace;
+    Nullspace << Eigen::MatrixXd::Identity(7, 7) -
+                     jacobian.transpose() * J_inv.transpose();
+
+    // Decoupled mass matrices
+    Eigen::MatrixXd Lambda_pos_inv(3, 3);
+    Lambda_pos_inv << jacobian_pos * M_inv * jacobian_pos.transpose();
+    Eigen::MatrixXd Lambda_ori_inv(3, 3);
+    Lambda_ori_inv << jacobian_ori * M_inv * jacobian_ori.transpose();
+
+    Eigen::MatrixXd Lambda_pos(3, 3);
+    Eigen::MatrixXd Lambda_ori(3, 3);
+    PInverse(Lambda_pos_inv, Lambda_pos);
+    PInverse(Lambda_ori_inv, Lambda_ori);
+
+    pos_error =
+        pos_error.unaryExpr([](double x) { return (abs(x) < 1e-4) ? 0. : x; });
+    ori_error =
+        ori_error.unaryExpr([](double x) { return (abs(x) < 5e-3) ? 0. : x; });
+
+    tau_d << jacobian_pos.transpose() *
+                     (Lambda_pos *
+                      (Kp_p * pos_error - Kd_p * (jacobian_pos * dq))) +
+                 jacobian_ori.transpose() *
+                     (Lambda_ori *
+                      (Kp_r * ori_error - Kd_r * (jacobian_ori * dq)));
+
+    // nullspace control
+    tau_d << tau_d + Nullspace * (static_q_task_ - q);
+
+    // Add joint avoidance potential
+    Eigen::Matrix<double, 7, 1> avoidance_force;
+    avoidance_force.setZero();
+    Eigen::Matrix<double, 7, 1> dist2joint_max;
+    Eigen::Matrix<double, 7, 1> dist2joint_min;
+
+    dist2joint_max = joint_max_.matrix() - q;
+    dist2joint_min = q - joint_min_.matrix();
+
+    for (int i = 0; i < 7; i++) {
+      if (dist2joint_max[i] < 0.25 && dist2joint_max[i] > 0.1)
+        avoidance_force[i] += -avoidance_weights_[i] * dist2joint_max[i];
+      if (dist2joint_min[i] < 0.25 && dist2joint_min[i] > 0.1)
+        avoidance_force[i] += avoidance_weights_[i] * dist2joint_min[i];
+    }
+    tau_d << tau_d + Nullspace * avoidance_force;
+    for (int i = 0; i < 7; i++) {
+      if (dist2joint_max[i] < 0.1 && tau_d[i] > 0.) tau_d[i] = 0.;
+      if (dist2joint_min[i] < 0.1 && tau_d[i] < 0.) tau_d[i] = 0.;
+    }
+
+    std::array<double, 7> tau_d_array{};
+    Eigen::VectorXd::Map(&tau_d_array[0], 7) = tau_d;
+
+    // end of controller
+    std::chrono::high_resolution_clock::time_point t2 =
+        std::chrono::high_resolution_clock::now();
+    auto time = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1);
+
+    std::array<double, 7> tau_d_rate_limited = franka::limitRate(
+        franka::kMaxTorqueRate, tau_d_array, robot_state.tau_J_d);
+
+    // deoxys/config/control_config.yml
+    double min_torque = -5;
+    double max_torque = 5;
+    TorqueSafetyGuardFn(tau_d_rate_limited, min_torque, max_torque);
+
+    return tau_d_rate_limited;
+  });
+}
+
+void FR3::zero_torque() {
+  // start thread
+
+  robot.setCollisionBehavior(
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}});
+
+  double time = 0.0;
+  this->robot.control([&](const franka::RobotState &state,
+                          franka::Duration time_step) -> franka::Torques {
+    time += time_step.toSec();
+    if (time > 30) {
+      // stop
+      return franka::MotionFinished(franka::Torques({0, 0, 0, 0, 0, 0, 0}));
+    }
+    return franka::Torques({0, 0, 0, 0, 0, 0, 0});
+  });
+}
+
 void FR3::move_home() { this->set_joint_position(q_home); }
 
 void FR3::automatic_error_recovery() { this->robot.automaticErrorRecovery(); }
diff --git a/src/hw/FR3.h b/src/hw/FR3.h
index bba59083..198f1da2 100644
--- a/src/hw/FR3.h
+++ b/src/hw/FR3.h
@@ -5,12 +5,15 @@
 #include <common/Pose.h>
 #include <common/Robot.h>
 #include <common/utils.h>
+#include <common/LinearPoseTrajInterpolator.h>
 #include <franka/robot.h>
 
 #include <cmath>
 #include <memory>
 #include <optional>
 #include <string>
+#include <thread>
+#include <mutex>
 
 namespace rcs {
 namespace hw {
@@ -46,6 +49,15 @@ class FR3 : public common::Robot {
   franka::Robot robot;
   FR3Config cfg;
   std::optional<std::shared_ptr<common::IK>> m_ik;
+  std::optional<std::thread> control_thread = std::nullopt;
+  // std::optional<Eigen::Vector3d> osc_desired_pos_EE_in_base_frame = std::nullopt;
+  // std::mutex osc_desired_pos_EE_in_base_frame_mutex;
+  common::LinearPoseTrajInterpolator traj_interpolator;
+  std::mutex traj_interpolator_mutex;
+  double controller_time = 0.0;
+  common::Pose curr_pose = common::Pose::Identity();
+
+  bool control_thread_running = false;
 
  public:
   FR3(const std::string &ip,
@@ -69,6 +81,16 @@ class FR3 : public common::Robot {
 
   void set_guiding_mode(bool enabled);
 
+  void osc_set_cartesian_position(const Eigen::Vector3d &desired_pos_EE_in_base_frame);
+  void osc2_set_cartesian_position(const common::Pose &desired_pose_EE_in_base_frame);
+
+  void stop_control_thread();
+
+  void osc();
+  void osc2();
+
+  void zero_torque();
+
   void move_home() override;
 
   void automatic_error_recovery();
diff --git a/src/pybind/rcsss.cpp b/src/pybind/rcsss.cpp
index 3753f738..ea8dee81 100644
--- a/src/pybind/rcsss.cpp
+++ b/src/pybind/rcsss.cpp
@@ -361,6 +361,10 @@ PYBIND11_MODULE(_core, m) {
            &rcs::hw::FR3::set_default_robot_behavior)
       .def("set_guiding_mode", &rcs::hw::FR3::set_guiding_mode,
            py::arg("enabled"))
+      .def("zero_torque", &rcs::hw::FR3::zero_torque)
+      .def("osc_set_cartesian_position", &rcs::hw::FR3::osc_set_cartesian_position, py::arg("desired_pos_EE_in_base_frame"))
+      .def("osc2_set_cartesian_position", &rcs::hw::FR3::osc2_set_cartesian_position, py::arg("desired_pose_EE_in_base_frame"))
+      .def("stop_control_thread", &rcs::hw::FR3::stop_control_thread)
       .def("automatic_error_recovery", &rcs::hw::FR3::automatic_error_recovery)
       .def("double_tap_robot_to_continue",
            &rcs::hw::FR3::double_tap_robot_to_continue)

From d812860e3ffc3a0b8f91f0853665e48427ee25ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Fri, 21 Mar 2025 13:34:52 +0100
Subject: [PATCH 03/56] added joint controller

---
 python/rcsss/_core/hw/__init__.pyi      |   3 +
 src/common/LinearPoseTrajInterpolator.h |  71 ++++++++++++
 src/hw/FR3.cpp                          | 146 ++++++++++++++++++++++--
 src/hw/FR3.h                            |   9 +-
 src/pybind/rcsss.cpp                    |   1 +
 5 files changed, 217 insertions(+), 13 deletions(-)

diff --git a/python/rcsss/_core/hw/__init__.pyi b/python/rcsss/_core/hw/__init__.pyi
index daeca1cb..966be7a9 100644
--- a/python/rcsss/_core/hw/__init__.pyi
+++ b/python/rcsss/_core/hw/__init__.pyi
@@ -49,6 +49,9 @@ class FHState(rcsss._core.common.GState):
 class FR3(rcsss._core.common.Robot):
     def __init__(self, ip: str, ik: rcsss._core.common.IK | None = None) -> None: ...
     def automatic_error_recovery(self) -> None: ...
+    def controller_set_joint_position(
+        self, desired_q: numpy.ndarray[typing.Literal[7], numpy.dtype[numpy.float64]]
+    ) -> None: ...
     def double_tap_robot_to_continue(self) -> None: ...
     def get_parameters(self) -> FR3Config: ...
     def get_state(self) -> FR3State: ...
diff --git a/src/common/LinearPoseTrajInterpolator.h b/src/common/LinearPoseTrajInterpolator.h
index 96b9406d..03a3d0f8 100644
--- a/src/common/LinearPoseTrajInterpolator.h
+++ b/src/common/LinearPoseTrajInterpolator.h
@@ -95,6 +95,77 @@ class LinearPoseTrajInterpolator {
     q_t = last_q_t_;
   };
 };
+class LinearJointPositionTrajInterpolator {
+private:
+  using Vector7d = Eigen::Matrix<double, 7, 1>;
+  using Vector7i = Eigen::Matrix<int, 7, 1>;
+
+  Vector7d q_start_;
+  Vector7d q_goal_;
+
+  Vector7d last_q_t_;
+  Vector7d prev_q_goal_;
+
+  double dt_;
+  double last_time_;
+  double max_time_;
+  double start_time_;
+  bool start_;
+  bool first_goal_;
+
+  double interpolation_fraction_; // fraction of actual interpolation within an
+                                  // interval
+
+public:
+  inline LinearJointPositionTrajInterpolator()
+      : dt_(0.), last_time_(0.), max_time_(1.), start_time_(0.), start_(false),
+        first_goal_(true){};
+
+  inline ~LinearJointPositionTrajInterpolator(){};
+
+  inline void Reset(const double &time_sec,
+                    const Eigen::Matrix<double, 7, 1> &q_start,
+                    const Eigen::Matrix<double, 7, 1> &q_goal,
+                    const int &policy_rate, const int &rate,
+                    const double &traj_interpolator_time_fraction) {
+    dt_ = 1. / static_cast<double>(rate);
+    last_time_ = time_sec;
+
+    max_time_ =
+        1. / static_cast<double>(policy_rate) * traj_interpolator_time_fraction;
+    start_time_ = time_sec;
+
+    start_ = false;
+
+    if (first_goal_) {
+      q_start_ = q_start;
+      prev_q_goal_ = q_start;
+      first_goal_ = false;
+      // std::cout << "First goal of the interpolation" << std::endl;
+    } else {
+      prev_q_goal_ = q_goal_;
+      q_start_ = prev_q_goal_;
+    }
+    q_goal_ = q_goal;
+  };
+
+  inline void GetNextStep(const double &time_sec, Vector7d &q_t) {
+    if (!start_) {
+      start_time_ = time_sec;
+      last_q_t_ = q_start_;
+      start_ = true;
+    }
+    // std::cout << q_start_.transpose() << " | " << q_goal_.transpose() <<
+    // std::endl;
+    if (last_time_ + dt_ <= time_sec) {
+      double t =
+          std::min(std::max((time_sec - start_time_) / max_time_, 0.), 1.);
+      last_q_t_ = q_start_ + t * (q_goal_ - q_start_);
+      last_time_ = time_sec;
+    }
+    q_t = last_q_t_;
+  };
+};
 }  // namespace common
 }  // namespace rcs
 #endif  // DEOXYS_FRANKA_INTERFACE_INCLUDE_UTILS_TRAJ_INTERPOLATORS_POSE_TRAJ_INTERPOLATOR_H_
diff --git a/src/hw/FR3.cpp b/src/hw/FR3.cpp
index bfa7f09a..af7a34d4 100644
--- a/src/hw/FR3.cpp
+++ b/src/hw/FR3.cpp
@@ -140,11 +140,41 @@ void TorqueSafetyGuardFn(std::array<double, 7> &tau_d_array, double min_torque,
   }
 }
 
-void FR3::osc2_set_cartesian_position(
-    const common::Pose &desired_pose_EE_in_base_frame) {
 
-  // auto pose = this->get_cartesian_position();
+void FR3::controller_set_joint_position(
+    const common::Vector7d &desired_q) {
+
+  // from deoxys/config/osc-position-controller.yml
+  double traj_interpolation_time_fraction = 0.3; // in s
+  // form deoxys/config/charmander.yml
+  int policy_rate = 20;
+  int traj_rate = 500;
+
+  if (!this->control_thread_running) {
+    this->controller_time = 0.0;
+    this->curr_joint_position = this->get_joint_position();
+  } else {
+    this->joint_interpolator_mutex.lock();
+  }
+
+  this->joint_interpolator.Reset(
+      this->controller_time, this->curr_joint_position, desired_q, policy_rate, traj_rate,
+      traj_interpolation_time_fraction);
+
+  // if not thread is running, then start
+  if (!this->control_thread_running) {
+    this->control_thread_running = true;
+    this->control_thread = std::thread(&FR3::joint_controller, this);
+  } else {
+    this->joint_interpolator_mutex.unlock();
+  }
+}
 
+// todos
+// - controller type
+// - joint type
+void FR3::osc2_set_cartesian_position(
+    const common::Pose &desired_pose_EE_in_base_frame) {
   // from deoxys/config/osc-position-controller.yml
   double traj_interpolation_time_fraction = 0.3;
   // form deoxys/config/charmander.yml
@@ -159,9 +189,10 @@ void FR3::osc2_set_cartesian_position(
   }
 
   this->traj_interpolator.Reset(
-      this->controller_time, this->curr_pose.translation(), this->curr_pose.quaternion(),
-      desired_pose_EE_in_base_frame.translation(), desired_pose_EE_in_base_frame.quaternion(),
-      policy_rate, traj_rate, traj_interpolation_time_fraction);
+      this->controller_time, this->curr_pose.translation(),
+      this->curr_pose.quaternion(), desired_pose_EE_in_base_frame.translation(),
+      desired_pose_EE_in_base_frame.quaternion(), policy_rate, traj_rate,
+      traj_interpolation_time_fraction);
 
   // if not thread is running, then start
   if (!this->control_thread_running) {
@@ -195,9 +226,10 @@ void FR3::osc_set_cartesian_position(
   }
 
   this->traj_interpolator.Reset(
-      this->controller_time, this->curr_pose.translation(), this->curr_pose.quaternion(),
-      desired_pos_EE_in_base_frame, fixed_desired_quat_EE_in_base_frame,
-      policy_rate, traj_rate, traj_interpolation_time_fraction);
+      this->controller_time, this->curr_pose.translation(),
+      this->curr_pose.quaternion(), desired_pos_EE_in_base_frame,
+      fixed_desired_quat_EE_in_base_frame, policy_rate, traj_rate,
+      traj_interpolation_time_fraction);
 
   // if not thread is running, then start
   if (!this->control_thread_running) {
@@ -481,7 +513,6 @@ void FR3::osc2() {
   // The manual residual mass matrix to add on the internal mass matrix
   residual_mass_vec_ << 0.0, 0.0, 0.0, 0.0, 0.1, 0.5, 0.5;
 
-
   joint_max_ << 2.8978, 1.7628, 2.8973, -0.0698, 2.8973, 3.7525, 2.8973;
   joint_min_ << -2.8973, -1.7628, -2.8973, -3.0718, -2.8973, -0.0175, -2.8973;
   avoidance_weights_ << 1., 1., 1., 1., 1., 10., 10.;
@@ -493,6 +524,7 @@ void FR3::osc2() {
 
     // torques handler
     if (!this->control_thread_running) {
+      // TODO: test if this also works when the robot is moving
       franka::Torques zero_torques{{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}};
       return franka::MotionFinished(zero_torques);
     }
@@ -512,7 +544,6 @@ void FR3::osc2() {
     int policy_rate = 20;
     int traj_rate = 500;
 
-
     this->traj_interpolator_mutex.lock();
     // if (this->controller_time == 0) {
     //   this->traj_interpolator.Reset(
@@ -565,7 +596,6 @@ void FR3::osc2() {
     // Joint velocity
     Eigen::Map<const Eigen::Matrix<double, 7, 1>> dq(robot_state.dq.data());
 
-
     if (desired_quat_EE_in_base_frame.coeffs().dot(
             quat_EE_in_base_frame.coeffs()) < 0.0) {
       quat_EE_in_base_frame.coeffs() << -quat_EE_in_base_frame.coeffs();
@@ -660,6 +690,98 @@ void FR3::osc2() {
   });
 }
 
+void FR3::joint_controller() {
+  franka::Model model = this->robot.loadModel();
+  this->controller_time = 0.0;
+  this->robot.setCollisionBehavior(
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}});
+
+
+  // deoxys/config/joint-impedance-controller.yml
+  common::Vector7d Kp;
+  Kp << 100., 100., 100., 100., 75., 150., 50.;
+
+  common::Vector7d Kd;
+  Kd << 20., 20., 20., 20., 7.5, 15.0, 5.0;
+ 
+
+  Eigen::Array<double, 7, 1> joint_max_;
+  Eigen::Array<double, 7, 1> joint_min_;
+
+
+  joint_max_ << 2.8978, 1.7628, 2.8973, -0.0698, 2.8973, 3.7525, 2.8973;
+  joint_min_ << -2.8973, -1.7628, -2.8973, -3.0718, -2.8973, -0.0175, -2.8973;
+
+  this->robot.control([&](const franka::RobotState &robot_state,
+                          franka::Duration period) -> franka::Torques {
+    std::chrono::high_resolution_clock::time_point t1 =
+        std::chrono::high_resolution_clock::now();
+
+
+    // torques handler
+    if (!this->control_thread_running) {
+      // TODO: test if this also works when the robot is moving
+      franka::Torques zero_torques{{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}};
+      return franka::MotionFinished(zero_torques);
+    }
+
+
+    common::Vector7d desired_q;
+
+    common::Pose pose(robot_state.O_T_EE);
+
+    this->joint_interpolator_mutex.lock();
+    this->controller_time += period.toSec();
+    this->joint_interpolator.GetNextStep(this->controller_time,
+                                        desired_q);
+    this->joint_interpolator_mutex.unlock();
+    // end torques handler
+
+    Eigen::Matrix<double, 7, 1> tau_d;
+
+    std::array<double, 49> mass_array = model.mass(robot_state);
+    Eigen::Map<Eigen::Matrix<double, 7, 7>> M(mass_array.data());
+
+    // coriolis and gravity
+    std::array<double, 7> coriolis_array = model.coriolis(robot_state);
+    Eigen::Map<const Eigen::Matrix<double, 7, 1>> coriolis(
+        coriolis_array.data());
+
+    // Current joint velocity
+    Eigen::Map<const Eigen::Matrix<double, 7, 1>> dq(robot_state.dq.data());
+
+    // Current joint position
+    Eigen::Map<const Eigen::Matrix<double, 7, 1>> q(robot_state.q.data());
+
+    Eigen::MatrixXd joint_pos_error(7, 1);
+
+    Eigen::Matrix<double, 7, 1> current_q, current_dq;
+
+    joint_pos_error << desired_q - q;
+
+    tau_d << Kp.cwiseProduct(joint_pos_error) - Kd.cwiseProduct(current_dq);
+
+    Eigen::Matrix<double, 7, 1> dist2joint_max;
+    Eigen::Matrix<double, 7, 1> dist2joint_min;
+
+    dist2joint_max = joint_max_.matrix() - current_q;
+    dist2joint_min = current_q - joint_min_.matrix();
+
+    for (int i = 0; i < 7; i++) {
+      if (dist2joint_max[i] < 0.1 && tau_d[i] > 0.) tau_d[i] = 0.;
+      if (dist2joint_min[i] < 0.1 && tau_d[i] < 0.) tau_d[i] = 0.;
+    }
+
+    std::array<double, 7> tau_d_array{};
+    Eigen::VectorXd::Map(&tau_d_array[0], 7) = tau_d;
+
+    return tau_d_array;
+  });
+}
+
 void FR3::zero_torque() {
   // start thread
 
diff --git a/src/hw/FR3.h b/src/hw/FR3.h
index 198f1da2..53f5cd1c 100644
--- a/src/hw/FR3.h
+++ b/src/hw/FR3.h
@@ -55,7 +55,12 @@ class FR3 : public common::Robot {
   common::LinearPoseTrajInterpolator traj_interpolator;
   std::mutex traj_interpolator_mutex;
   double controller_time = 0.0;
-  common::Pose curr_pose = common::Pose::Identity();
+  common::Pose curr_pose;
+
+
+  common::LinearJointPositionTrajInterpolator joint_interpolator;
+  std::mutex joint_interpolator_mutex;
+  common::Vector7d curr_joint_position;
 
   bool control_thread_running = false;
 
@@ -81,6 +86,7 @@ class FR3 : public common::Robot {
 
   void set_guiding_mode(bool enabled);
 
+  void controller_set_joint_position(const common::Vector7d &desired_q);
   void osc_set_cartesian_position(const Eigen::Vector3d &desired_pos_EE_in_base_frame);
   void osc2_set_cartesian_position(const common::Pose &desired_pose_EE_in_base_frame);
 
@@ -88,6 +94,7 @@ class FR3 : public common::Robot {
 
   void osc();
   void osc2();
+  void joint_controller();
 
   void zero_torque();
 
diff --git a/src/pybind/rcsss.cpp b/src/pybind/rcsss.cpp
index ea8dee81..af285756 100644
--- a/src/pybind/rcsss.cpp
+++ b/src/pybind/rcsss.cpp
@@ -363,6 +363,7 @@ PYBIND11_MODULE(_core, m) {
            py::arg("enabled"))
       .def("zero_torque", &rcs::hw::FR3::zero_torque)
       .def("osc_set_cartesian_position", &rcs::hw::FR3::osc_set_cartesian_position, py::arg("desired_pos_EE_in_base_frame"))
+      .def("controller_set_joint_position", &rcs::hw::FR3::controller_set_joint_position, py::arg("desired_q"))
       .def("osc2_set_cartesian_position", &rcs::hw::FR3::osc2_set_cartesian_position, py::arg("desired_pose_EE_in_base_frame"))
       .def("stop_control_thread", &rcs::hw::FR3::stop_control_thread)
       .def("automatic_error_recovery", &rcs::hw::FR3::automatic_error_recovery)

From 8603b0b1aa973cc8c8ceca939a93ede439c4640b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Fri, 21 Mar 2025 13:45:21 +0100
Subject: [PATCH 04/56] fixed joint space controller

---
 src/hw/FR3.cpp | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/src/hw/FR3.cpp b/src/hw/FR3.cpp
index af7a34d4..f7a5945c 100644
--- a/src/hw/FR3.cpp
+++ b/src/hw/FR3.cpp
@@ -742,13 +742,6 @@ void FR3::joint_controller() {
 
     Eigen::Matrix<double, 7, 1> tau_d;
 
-    std::array<double, 49> mass_array = model.mass(robot_state);
-    Eigen::Map<Eigen::Matrix<double, 7, 7>> M(mass_array.data());
-
-    // coriolis and gravity
-    std::array<double, 7> coriolis_array = model.coriolis(robot_state);
-    Eigen::Map<const Eigen::Matrix<double, 7, 1>> coriolis(
-        coriolis_array.data());
 
     // Current joint velocity
     Eigen::Map<const Eigen::Matrix<double, 7, 1>> dq(robot_state.dq.data());
@@ -758,17 +751,16 @@ void FR3::joint_controller() {
 
     Eigen::MatrixXd joint_pos_error(7, 1);
 
-    Eigen::Matrix<double, 7, 1> current_q, current_dq;
 
     joint_pos_error << desired_q - q;
 
-    tau_d << Kp.cwiseProduct(joint_pos_error) - Kd.cwiseProduct(current_dq);
+    tau_d << Kp.cwiseProduct(joint_pos_error) - Kd.cwiseProduct(dq);
 
     Eigen::Matrix<double, 7, 1> dist2joint_max;
     Eigen::Matrix<double, 7, 1> dist2joint_min;
 
-    dist2joint_max = joint_max_.matrix() - current_q;
-    dist2joint_min = current_q - joint_min_.matrix();
+    dist2joint_max = joint_max_.matrix() - q;
+    dist2joint_min = q - joint_min_.matrix();
 
     for (int i = 0; i < 7; i++) {
       if (dist2joint_max[i] < 0.1 && tau_d[i] > 0.) tau_d[i] = 0.;

From 4a9ef04c26191f3c41017796bfce77ce62b7156e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Mon, 24 Mar 2025 17:32:49 +0100
Subject: [PATCH 05/56] fix: lock usgage, controllers, remove unused code

- remove osc2
- only one lock
- fixed get joint and cartesian for controllers
---
 src/hw/FR3.cpp       | 314 +++++--------------------------------------
 src/hw/FR3.h         |  13 +-
 src/pybind/rcsss.cpp |   1 -
 3 files changed, 40 insertions(+), 288 deletions(-)

diff --git a/src/hw/FR3.cpp b/src/hw/FR3.cpp
index f7a5945c..a0125047 100644
--- a/src/hw/FR3.cpp
+++ b/src/hw/FR3.cpp
@@ -86,8 +86,15 @@ void FR3::set_default_robot_behavior() {
 }
 
 common::Pose FR3::get_cartesian_position() {
-  franka::RobotState state = this->robot.readOnce();
-  common::Pose x(state.O_T_EE);
+  common::Pose x;
+  if (!this->control_thread_running) {
+    this->curr_state = this->robot.readOnce();
+    x = common::Pose(this->curr_state.O_T_EE);
+  } else {
+    this->interpolator_mutex.lock();
+    x = common::Pose(this->curr_state.O_T_EE);
+    this->interpolator_mutex.unlock();
+  }
   return x;
 }
 
@@ -98,8 +105,15 @@ void FR3::set_joint_position(const common::Vector7d &q) {
 }
 
 common::Vector7d FR3::get_joint_position() {
-  franka::RobotState state = this->robot.readOnce();
-  common::Vector7d joints(state.q.data());
+  common::Vector7d joints;
+  if (!this->control_thread_running) {
+    this->curr_state = this->robot.readOnce();
+    joints = common::Vector7d(this->curr_state.q.data());
+  } else {
+    this->interpolator_mutex.lock();
+    joints = common::Vector7d(this->curr_state.q.data());
+    this->interpolator_mutex.unlock();
+  }
   return joints;
 }
 
@@ -152,13 +166,13 @@ void FR3::controller_set_joint_position(
 
   if (!this->control_thread_running) {
     this->controller_time = 0.0;
-    this->curr_joint_position = this->get_joint_position();
+    this->get_joint_position();
   } else {
-    this->joint_interpolator_mutex.lock();
+    this->interpolator_mutex.lock();
   }
 
   this->joint_interpolator.Reset(
-      this->controller_time, this->curr_joint_position, desired_q, policy_rate, traj_rate,
+      this->controller_time, Eigen::Map<common::Vector7d>(this->curr_state.q.data()), desired_q, policy_rate, traj_rate,
       traj_interpolation_time_fraction);
 
   // if not thread is running, then start
@@ -166,14 +180,14 @@ void FR3::controller_set_joint_position(
     this->control_thread_running = true;
     this->control_thread = std::thread(&FR3::joint_controller, this);
   } else {
-    this->joint_interpolator_mutex.unlock();
+    this->interpolator_mutex.unlock();
   }
 }
 
 // todos
 // - controller type
 // - joint type
-void FR3::osc2_set_cartesian_position(
+void FR3::osc_set_cartesian_position(
     const common::Pose &desired_pose_EE_in_base_frame) {
   // from deoxys/config/osc-position-controller.yml
   double traj_interpolation_time_fraction = 0.3;
@@ -183,63 +197,28 @@ void FR3::osc2_set_cartesian_position(
 
   if (!this->control_thread_running) {
     this->controller_time = 0.0;
-    this->curr_pose = this->get_cartesian_position();
+    this->get_cartesian_position();
   } else {
-    this->traj_interpolator_mutex.lock();
+    this->interpolator_mutex.lock();
   }
 
+  common::Pose curr_pose(this->curr_state.O_T_EE);
   this->traj_interpolator.Reset(
-      this->controller_time, this->curr_pose.translation(),
-      this->curr_pose.quaternion(), desired_pose_EE_in_base_frame.translation(),
+      this->controller_time, curr_pose.translation(),
+      curr_pose.quaternion(), desired_pose_EE_in_base_frame.translation(),
       desired_pose_EE_in_base_frame.quaternion(), policy_rate, traj_rate,
       traj_interpolation_time_fraction);
 
-  // if not thread is running, then start
-  if (!this->control_thread_running) {
-    this->control_thread_running = true;
-    this->control_thread = std::thread(&FR3::osc2, this);
-  } else {
-    this->traj_interpolator_mutex.unlock();
-  }
-}
-
-void FR3::osc_set_cartesian_position(
-    const Eigen::Vector3d &desired_pos_EE_in_base_frame) {
-  // this->osc_desired_pos_EE_in_base_frame_mutex.lock();
-  // this->osc_desired_pos_EE_in_base_frame = desired_pos_EE_in_base_frame;
-  // this->osc_desired_pos_EE_in_base_frame_mutex.unlock();
-
-  Eigen::Quaterniond fixed_desired_quat_EE_in_base_frame(0., 1., 0., 0.);
-
-  // from deoxys/config/osc-position-controller.yml
-  double traj_interpolation_time_fraction = 0.3;
-  // form deoxys/config/charmander.yml
-  int policy_rate = 20;
-  int traj_rate = 500;
-
-  if (!this->control_thread_running) {
-    this->controller_time = 0.0;
-    // auto pose = this->get_cartesian_position();
-    this->curr_pose = this->get_cartesian_position();
-  } else {
-    this->traj_interpolator_mutex.lock();
-  }
-
-  this->traj_interpolator.Reset(
-      this->controller_time, this->curr_pose.translation(),
-      this->curr_pose.quaternion(), desired_pos_EE_in_base_frame,
-      fixed_desired_quat_EE_in_base_frame, policy_rate, traj_rate,
-      traj_interpolation_time_fraction);
-
   // if not thread is running, then start
   if (!this->control_thread_running) {
     this->control_thread_running = true;
     this->control_thread = std::thread(&FR3::osc, this);
   } else {
-    this->traj_interpolator_mutex.unlock();
+    this->interpolator_mutex.unlock();
   }
 }
 
+
 // method to stop thread
 void FR3::stop_control_thread() {
   if (this->control_thread.has_value() && this->control_thread_running) {
@@ -270,228 +249,6 @@ void FR3::osc() {
   // translation: [150.0, 150.0, 150.0]
   // rotation: 250.0
 
-  Eigen::Matrix<double, 3, 3> Kp_p, Kp_r, Kd_p, Kd_r;
-  Eigen::Matrix<double, 7, 1> static_q_task_;
-  // Eigen::Vector3d prev_goal_pos_EE_in_base_frame;
-  Eigen::Matrix<double, 7, 1> residual_mass_vec_;
-
-  // values from deoxys/config/osc-position-controller.yml
-  Kp_p.diagonal() << 150, 150, 150;
-  Kp_r.diagonal() << 250, 250, 250;
-
-  Kd_p << Kp_p.cwiseSqrt() * 2.0;
-  Kd_r << Kp_r.cwiseSqrt() * 2.0;
-
-  static_q_task_ << 0.09017809387254755, -0.9824203501652151,
-      0.030509718397568178, -2.694229634937343, 0.057700675144720104,
-      1.860298714876101, 0.8713759453244422;
-
-  // The manual residual mass matrix to add on the internal mass matrix
-  residual_mass_vec_ << 0.0, 0.0, 0.0, 0.0, 0.1, 0.5, 0.5;
-
-  this->robot.control([&](const franka::RobotState &robot_state,
-                          franka::Duration period) -> franka::Torques {
-    std::chrono::high_resolution_clock::time_point t1 =
-        std::chrono::high_resolution_clock::now();
-
-    // torques handler
-    if (!this->control_thread_running) {
-      franka::Torques zero_torques{{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}};
-      return franka::MotionFinished(zero_torques);
-    }
-    // TO BE replaced
-    // if (!this->control_thread_running && dq.maxCoeff() < 0.0001) {
-    //   return franka::MotionFinished(franka::Torques(tau_d_array));
-    // }
-
-    Eigen::Vector3d desired_pos_EE_in_base_frame;
-    Eigen::Quaterniond desired_quat_EE_in_base_frame;
-
-    common::Pose pose(robot_state.O_T_EE);
-    // auto pose = this->get_cartesian_position();
-    // from deoxys/config/osc-position-controller.yml
-    double traj_interpolation_time_fraction = 0.3;
-    // form deoxys/config/charmander.yml
-    int policy_rate = 20;
-    int traj_rate = 500;
-
-    // w, x, y, z
-    // TODO: change this non fixed?
-    Eigen::Quaterniond fixed_desired_quat_EE_in_base_frame(0., 1., 0., 0.);
-
-    this->traj_interpolator_mutex.lock();
-    // if (this->controller_time == 0) {
-    //   this->traj_interpolator.Reset(
-    //       0., pose.translation(), pose.quaternion(),
-    //       desired_pos_EE_in_base_frame, fixed_desired_quat_EE_in_base_frame,
-    //       policy_rate, traj_rate, traj_interpolation_time_fraction);
-    // }
-    this->curr_pose = common::Pose(robot_state.O_T_EE);
-    this->controller_time += period.toSec();
-    this->traj_interpolator.GetNextStep(this->controller_time,
-                                        desired_pos_EE_in_base_frame,
-                                        desired_quat_EE_in_base_frame);
-    this->traj_interpolator_mutex.unlock();
-
-    // end torques handler
-
-    Eigen::Matrix<double, 7, 1> tau_d;
-
-    std::array<double, 49> mass_array = model.mass(robot_state);
-
-    Eigen::Map<Eigen::Matrix<double, 7, 7>> M(mass_array.data());
-
-    M = M + Eigen::Matrix<double, 7, 7>(residual_mass_vec_.asDiagonal());
-
-    // coriolis and gravity
-    std::array<double, 7> coriolis_array = model.coriolis(robot_state);
-    Eigen::Map<const Eigen::Matrix<double, 7, 1>> coriolis(
-        coriolis_array.data());
-
-    std::array<double, 7> gravity_array = model.gravity(robot_state);
-    Eigen::Map<const Eigen::Matrix<double, 7, 1>> gravity(gravity_array.data());
-
-    std::array<double, 42> jacobian_array =
-        model.zeroJacobian(franka::Frame::kEndEffector, robot_state);
-    Eigen::Map<const Eigen::Matrix<double, 6, 7>> jacobian(
-        jacobian_array.data());
-
-    Eigen::MatrixXd jacobian_pos(3, 7);
-    Eigen::MatrixXd jacobian_ori(3, 7);
-    jacobian_pos << jacobian.block(0, 0, 3, 7);
-    jacobian_ori << jacobian.block(3, 0, 3, 7);
-
-    // End effector pose in base frame
-    Eigen::Affine3d T_EE_in_base_frame(
-        Eigen::Matrix4d::Map(robot_state.O_T_EE.data()));
-    Eigen::Vector3d pos_EE_in_base_frame(T_EE_in_base_frame.translation());
-    Eigen::Quaterniond quat_EE_in_base_frame(T_EE_in_base_frame.linear());
-
-    // Joint velocity
-    Eigen::Map<const Eigen::Matrix<double, 7, 1>> dq(robot_state.dq.data());
-
-    // Nullspace goal
-    Eigen::Map<const Eigen::Matrix<double, 7, 1>> q(robot_state.q.data());
-
-    // Update the specified state estimator
-    // if (this->state_estimator_ptr_->IsFirstState()) {
-    //   this->state_estimator_ptr_->Initialize(q, dq, pos_EE_in_base_frame,
-    //                                          quat_EE_in_base_frame);
-    // } else {
-    //   this->state_estimator_ptr_->Update(q, dq, pos_EE_in_base_frame,
-    //                                      quat_EE_in_base_frame);
-    // }
-
-    Eigen::Matrix<double, 7, 1> current_q, current_dq;
-    // Get state from a specified state estimator
-    // current_q = this->state_estimator_ptr_->GetCurrentJointPos();
-    // current_dq = this->state_estimator_ptr_->GetCurrentJointVel();
-    current_q = q;    // this->state_estimator_ptr_->GetCurrentJointPos();
-    current_dq = dq;  // this->state_estimator_ptr_->GetCurrentJointVel();
-
-    // Get eef states from a specified state estimator
-    // pos_EE_in_base_frame = this->state_estimator_ptr_->GetCurrentEEFPos();
-    // quat_EE_in_base_frame = this->state_estimator_ptr_->GetCurrentEEFQuat();
-
-    if (fixed_desired_quat_EE_in_base_frame.coeffs().dot(
-            quat_EE_in_base_frame.coeffs()) < 0.0) {
-      quat_EE_in_base_frame.coeffs() << -quat_EE_in_base_frame.coeffs();
-    }
-
-    Eigen::Vector3d pos_error;
-
-    pos_error << desired_pos_EE_in_base_frame - pos_EE_in_base_frame;
-
-    Eigen::Quaterniond quat_error(
-        fixed_desired_quat_EE_in_base_frame.inverse() * quat_EE_in_base_frame);
-    Eigen::Vector3d ori_error;
-    ori_error << quat_error.x(), quat_error.y(), quat_error.z();
-    ori_error << -T_EE_in_base_frame.linear() * ori_error;
-
-    // Compute matrices
-    Eigen::Matrix<double, 7, 7> M_inv(M.inverse());
-    Eigen::MatrixXd Lambda_inv(6, 6);
-    Lambda_inv << jacobian * M_inv * jacobian.transpose();
-    Eigen::MatrixXd Lambda(6, 6);
-    PInverse(Lambda_inv, Lambda);
-
-    Eigen::Matrix<double, 7, 6> J_inv;
-    J_inv << M_inv * jacobian.transpose() * Lambda;
-    Eigen::Matrix<double, 7, 7> Nullspace;
-    Nullspace << Eigen::MatrixXd::Identity(7, 7) -
-                     jacobian.transpose() * J_inv.transpose();
-
-    // Decoupled mass matrices
-    Eigen::MatrixXd Lambda_pos_inv(3, 3);
-    Lambda_pos_inv << jacobian_pos * M_inv * jacobian_pos.transpose();
-    Eigen::MatrixXd Lambda_ori_inv(3, 3);
-    Lambda_ori_inv << jacobian_ori * M_inv * jacobian_ori.transpose();
-
-    Eigen::MatrixXd Lambda_pos(3, 3);
-    Eigen::MatrixXd Lambda_ori(3, 3);
-    PInverse(Lambda_pos_inv, Lambda_pos);
-    PInverse(Lambda_ori_inv, Lambda_ori);
-
-    pos_error =
-        pos_error.unaryExpr([](double x) { return (abs(x) < 1e-4) ? 0. : x; });
-    ori_error =
-        ori_error.unaryExpr([](double x) { return (abs(x) < 5e-3) ? 0. : x; });
-
-    tau_d << jacobian_pos.transpose() *
-                     (Lambda_pos *
-                      (Kp_p * pos_error - Kd_p * (jacobian_pos * current_dq))) +
-                 jacobian_ori.transpose() *
-                     (Lambda_ori *
-                      (Kp_r * ori_error - Kd_r * (jacobian_ori * current_dq)));
-
-    // compensation - Try low gain with compensation?
-    // tau_d << tau_d + coriolis;
-    // tau_d << tau_d + gravity;
-    // nullspace control
-    tau_d << tau_d + Nullspace * (static_q_task_ - current_q);
-    // std::cout << "Nullspace : " << (Nullspace * (static_q_task_ -
-    // q)).transpose() << std::endl;
-
-    std::array<double, 7> tau_d_array{};
-    Eigen::VectorXd::Map(&tau_d_array[0], 7) = tau_d;
-
-    std::chrono::high_resolution_clock::time_point t2 =
-        std::chrono::high_resolution_clock::now();
-    auto time = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1);
-    // std::cout << "OSC step took: " << time.count() << " ms"  << std::endl;
-
-    std::array<double, 7> tau_d_rate_limited = franka::limitRate(
-        franka::kMaxTorqueRate, tau_d_array, robot_state.tau_J_d);
-
-    // deoxys/config/control_config.yml
-    double min_torque = -5;
-    double max_torque = 5;
-    TorqueSafetyGuardFn(tau_d_rate_limited, min_torque, max_torque);
-
-    return tau_d_rate_limited;
-  });
-}
-
-void FR3::osc2() {
-  franka::Model model = this->robot.loadModel();
-
-  this->controller_time = 0.0;
-
-  this->robot.setCollisionBehavior(
-      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
-      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
-      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
-      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}});
-
-  // from bench mark
-  // ([150.0, 150.0, 60.0], 250.0), // kp_translation, kp_rotation
-  // ([60.0, 150.0, 150.0], 250.0), // kd_translation, kd_rotation
-
-  // from config file
-  // Kp:
-  // translation: [150.0, 150.0, 150.0]
-  // rotation: 250.0
-
   Eigen::Matrix<double, 3, 3> Kp_p, Kp_r, Kd_p, Kd_r;
   Eigen::Matrix<double, 7, 1> static_q_task_;
   Eigen::Matrix<double, 7, 1> residual_mass_vec_;
@@ -544,18 +301,19 @@ void FR3::osc2() {
     int policy_rate = 20;
     int traj_rate = 500;
 
-    this->traj_interpolator_mutex.lock();
+    this->interpolator_mutex.lock();
     // if (this->controller_time == 0) {
     //   this->traj_interpolator.Reset(
     //       0., pose.translation(), pose.quaternion(),
     //       desired_pos_EE_in_base_frame, fixed_desired_quat_EE_in_base_frame,
     //       policy_rate, traj_rate, traj_interpolation_time_fraction);
     // }
+    this->curr_state = robot_state;
     this->controller_time += period.toSec();
     this->traj_interpolator.GetNextStep(this->controller_time,
                                         desired_pos_EE_in_base_frame,
                                         desired_quat_EE_in_base_frame);
-    this->traj_interpolator_mutex.unlock();
+    this->interpolator_mutex.unlock();
 
     // end torques handler
 
@@ -730,14 +488,14 @@ void FR3::joint_controller() {
 
 
     common::Vector7d desired_q;
-
     common::Pose pose(robot_state.O_T_EE);
 
-    this->joint_interpolator_mutex.lock();
+    this->interpolator_mutex.lock();
+    this->curr_state = robot_state;
     this->controller_time += period.toSec();
     this->joint_interpolator.GetNextStep(this->controller_time,
                                         desired_q);
-    this->joint_interpolator_mutex.unlock();
+    this->interpolator_mutex.unlock();
     // end torques handler
 
     Eigen::Matrix<double, 7, 1> tau_d;
diff --git a/src/hw/FR3.h b/src/hw/FR3.h
index 53f5cd1c..5f1432a6 100644
--- a/src/hw/FR3.h
+++ b/src/hw/FR3.h
@@ -50,19 +50,16 @@ class FR3 : public common::Robot {
   FR3Config cfg;
   std::optional<std::shared_ptr<common::IK>> m_ik;
   std::optional<std::thread> control_thread = std::nullopt;
-  // std::optional<Eigen::Vector3d> osc_desired_pos_EE_in_base_frame = std::nullopt;
-  // std::mutex osc_desired_pos_EE_in_base_frame_mutex;
   common::LinearPoseTrajInterpolator traj_interpolator;
-  std::mutex traj_interpolator_mutex;
   double controller_time = 0.0;
-  common::Pose curr_pose;
 
 
   common::LinearJointPositionTrajInterpolator joint_interpolator;
-  std::mutex joint_interpolator_mutex;
-  common::Vector7d curr_joint_position;
+
+  franka::RobotState curr_state;
 
   bool control_thread_running = false;
+  std::mutex interpolator_mutex;
 
  public:
   FR3(const std::string &ip,
@@ -87,13 +84,11 @@ class FR3 : public common::Robot {
   void set_guiding_mode(bool enabled);
 
   void controller_set_joint_position(const common::Vector7d &desired_q);
-  void osc_set_cartesian_position(const Eigen::Vector3d &desired_pos_EE_in_base_frame);
-  void osc2_set_cartesian_position(const common::Pose &desired_pose_EE_in_base_frame);
+  void osc_set_cartesian_position(const common::Pose &desired_pose_EE_in_base_frame);
 
   void stop_control_thread();
 
   void osc();
-  void osc2();
   void joint_controller();
 
   void zero_torque();
diff --git a/src/pybind/rcsss.cpp b/src/pybind/rcsss.cpp
index af285756..73ac7a6b 100644
--- a/src/pybind/rcsss.cpp
+++ b/src/pybind/rcsss.cpp
@@ -364,7 +364,6 @@ PYBIND11_MODULE(_core, m) {
       .def("zero_torque", &rcs::hw::FR3::zero_torque)
       .def("osc_set_cartesian_position", &rcs::hw::FR3::osc_set_cartesian_position, py::arg("desired_pos_EE_in_base_frame"))
       .def("controller_set_joint_position", &rcs::hw::FR3::controller_set_joint_position, py::arg("desired_q"))
-      .def("osc2_set_cartesian_position", &rcs::hw::FR3::osc2_set_cartesian_position, py::arg("desired_pose_EE_in_base_frame"))
       .def("stop_control_thread", &rcs::hw::FR3::stop_control_thread)
       .def("automatic_error_recovery", &rcs::hw::FR3::automatic_error_recovery)
       .def("double_tap_robot_to_continue",

From daebcb31462de1b971fbd0e3d87b0b6929b6143a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 25 Mar 2025 10:37:32 +0100
Subject: [PATCH 06/56] updated stubs and added controllers to gym env

---
 python/rcsss/_core/hw/__init__.pyi |  5 +----
 python/rcsss/envs/base.py          | 15 ++++++++++++---
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/python/rcsss/_core/hw/__init__.pyi b/python/rcsss/_core/hw/__init__.pyi
index 966be7a9..f38dc9f9 100644
--- a/python/rcsss/_core/hw/__init__.pyi
+++ b/python/rcsss/_core/hw/__init__.pyi
@@ -55,10 +55,7 @@ class FR3(rcsss._core.common.Robot):
     def double_tap_robot_to_continue(self) -> None: ...
     def get_parameters(self) -> FR3Config: ...
     def get_state(self) -> FR3State: ...
-    def osc2_set_cartesian_position(self, desired_pose_EE_in_base_frame: rcsss._core.common.Pose) -> None: ...
-    def osc_set_cartesian_position(
-        self, desired_pos_EE_in_base_frame: numpy.ndarray[typing.Literal[3], numpy.dtype[numpy.float64]]
-    ) -> None: ...
+    def osc_set_cartesian_position(self, desired_pos_EE_in_base_frame: rcsss._core.common.Pose) -> None: ...
     def set_cartesian_position_ik(
         self, pose: rcsss._core.common.Pose, max_time: float, elbow: float | None, max_force: float | None = 5
     ) -> None: ...
diff --git a/python/rcsss/envs/base.py b/python/rcsss/envs/base.py
index 1724998b..cd15f2d3 100644
--- a/python/rcsss/envs/base.py
+++ b/python/rcsss/envs/base.py
@@ -8,6 +8,7 @@
 import gymnasium as gym
 import numpy as np
 from rcsss import common, sim
+from rcsss._core import hw
 from rcsss.camera.interface import BaseCameraSet
 from rcsss.envs.space_utils import (
     ActObsInfoWrapper,
@@ -159,7 +160,7 @@ class FR3Env(gym.Env):
     y
     """
 
-    def __init__(self, robot: common.Robot, control_mode: ControlMode):
+    def __init__(self, robot: hw.FR3, control_mode: ControlMode):
         self.robot = robot
         self._control_mode_overrides = [control_mode]
         self.action_space: gym.spaces.Dict
@@ -223,7 +224,8 @@ def step(self, action: CartOrJointContType) -> tuple[ArmObsType, float, bool, bo
             or not np.allclose(action_dict[self.joints_key], self.prev_action[self.joints_key], atol=1e-03, rtol=0)
         ):
             # cast is needed because typed dicts cannot be checked at runtime
-            self.robot.set_joint_position(action_dict[self.joints_key])
+            # self.robot.set_joint_position(action_dict[self.joints_key])
+            self.robot.controller_set_joint_position(action_dict[self.joints_key])
         elif self.get_base_control_mode() == ControlMode.CARTESIAN_TRPY and (
             self.prev_action is None
             or not np.allclose(action_dict[self.trpy_key], self.prev_action[self.trpy_key], atol=1e-03, rtol=0)
@@ -235,7 +237,10 @@ def step(self, action: CartOrJointContType) -> tuple[ArmObsType, float, bool, bo
             self.prev_action is None
             or not np.allclose(action_dict[self.tquart_key], self.prev_action[self.tquart_key], atol=1e-03, rtol=0)
         ):
-            self.robot.set_cartesian_position(
+            # self.robot.set_cartesian_position(
+            #     common.Pose(translation=action_dict[self.tquart_key][:3], quaternion=action_dict[self.tquart_key][3:])
+            # )
+            self.robot.osc_set_cartesian_position(
                 common.Pose(translation=action_dict[self.tquart_key][:3], quaternion=action_dict[self.tquart_key][3:])
             )
         self.prev_action = copy.deepcopy(action_dict)
@@ -253,6 +258,10 @@ def reset(
         self.robot.reset()
         return self.get_obs(), {}
 
+    def close(self):
+        self.robot.stop_control_thread()
+        super().close()
+
 
 class RelativeTo(Enum):
     LAST_STEP = auto()

From 84d7a4afb6f972f996b04f045af0bb95b5174953 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 25 Mar 2025 14:08:12 +0100
Subject: [PATCH 07/56] feat: updated vive teleop to use controls and data
 recording

---
 pyproject.toml                |   3 +-
 python/rcsss/control/vive.py  | 172 +++++++++++++++++++++++-----------
 python/rcsss/envs/wrappers.py | 155 ++++++++++++++++++++++++++++++
 3 files changed, 275 insertions(+), 55 deletions(-)
 create mode 100644 python/rcsss/envs/wrappers.py

diff --git a/pyproject.toml b/pyproject.toml
index f2fde75e..e0eba297 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,8 @@ dependencies = ["websockets>=11.0",
     # NOTE: when changing the mujoco version, also change it in requirements_dev.txt
     "mujoco==3.2.6",
     # NOTE: Same for pinocchio (pin)
-    "pin==2.7.0"
+    "pin==2.7.0",
+    "flatten-dict==0.4.2",
 ]
 readme = "README.md"
 maintainers = [
diff --git a/python/rcsss/control/vive.py b/python/rcsss/control/vive.py
index 549b17a7..f21433ae 100644
--- a/python/rcsss/control/vive.py
+++ b/python/rcsss/control/vive.py
@@ -1,12 +1,16 @@
 import logging
+import sys
 import threading
-import typing
 from enum import IntFlag, auto
 from socket import AF_INET, SOCK_DGRAM, socket
-from struct import pack, unpack
+from struct import unpack
+from time import sleep
+from rcsss.camera.realsense import RealSenseCameraSet
 
 import numpy as np
 from rcsss._core.common import RPY, Pose
+from rcsss.control.fr3_desk import FCI, Desk, DummyResourceManager
+from rcsss.control.utils import load_creds_fr3_desk
 from rcsss.envs.base import (
     ControlMode,
     GripperDictType,
@@ -21,21 +25,23 @@
     default_fr3_sim_gripper_cfg,
     default_fr3_sim_robot_cfg,
     default_mujoco_cameraset_cfg,
+    default_realsense,
     fr3_hw_env,
     fr3_sim_env,
 )
-
-# import matplotlib.pyplot as plt
+from rcsss.envs.wrappers import StorageWrapper
 
 
 logger = logging.getLogger(__name__)
 
 EGO_LOCK = False
-VIVE_HOST = "192.168.100.1"
+VIVE_HOST = "192.168.99.1"
 VIVE_PORT = 54321
+
 INCLUDE_ROTATION = True
 ROBOT_IP = "192.168.101.1"
 ROBOT_INSTANCE = RobotInstance.HARDWARE
+DEBUG = True
 
 
 class Button(IntFlag):
@@ -49,9 +55,10 @@ class Button(IntFlag):
 
 class UDPViveActionServer(threading.Thread):
     # seven doubles and one integer in network byte order
-    FMT = "!" + 7 * "d" + "i"
+    FMT = "!" + 7 * "d" + "i" + 6 * "d"
 
     # base transform from OpenXR coordinate system
+    # transform_from_openxr = Pose(RPY(roll=0.5 * np.pi, yaw=0)) # for second robot
     transform_from_openxr = Pose(RPY(roll=0.5 * np.pi, yaw=np.pi))
 
     def __init__(
@@ -74,6 +81,7 @@ def __init__(
         self._ego_lock = EGO_LOCK
         self._ego_transform = Pose()
         self._env.set_origin_to_current()
+        self._step_env = False
 
     def next_action(self) -> Pose:
         transform = Pose(
@@ -94,14 +102,12 @@ def get_last_controller_pose(self) -> Pose:
     def run(self):
         warning_raised = False
 
-        with socket(AF_INET, SOCK_DGRAM) as sock, socket(AF_INET, SOCK_DGRAM) as send_sock:
+        with socket(AF_INET, SOCK_DGRAM) as sock:
             sock.settimeout(2)
-            sock.setblocking(False)
             sock.bind((self._host, self._port))
-            send_sock.connect(("127.0.0.1", self._port + 1))
             while not self._exit_requested:
                 try:
-                    unpacked = unpack(UDPViveActionServer.FMT, sock.recv(7 * 8 + 4))
+                    unpacked = unpack(UDPViveActionServer.FMT, sock.recv(13 * 8 + 4))
                     if warning_raised:
                         logger.info("[UDP Server] connection reestablished")
                         warning_raised = False
@@ -130,7 +136,7 @@ def run(self):
                             )
 
                             # Compute angle around z axis: https://stackoverflow.com/questions/21483999/using-atan2-to-find-angle-between-two-vectors
-                            rot_z = np.arctan2(x_axis_rot.translation()[1], x_axis_rot.translation()[0]) - np.arctan2(
+                            rot_z = np.atan2(x_axis_rot.translation()[1], x_axis_rot.translation()[0]) - np.atan2(
                                 x_axis.translation()[1], x_axis.translation()[0]
                             )
                             rot_z -= np.pi / 2
@@ -155,19 +161,6 @@ def run(self):
                         # button is pressed
                         self._last_controller_pose = last_controller_pose
 
-                        # plot current offset with liveplot.py
-                        transform = Pose(
-                            translation=self._last_controller_pose.translation() - self._offset_pose.translation(),
-                            quaternion=(self._last_controller_pose * self._offset_pose.inverse()).rotation_q(),
-                        )
-                        offset = (
-                            self._ego_transform
-                            * UDPViveActionServer.transform_from_openxr
-                            * transform
-                            * UDPViveActionServer.transform_from_openxr.inverse()
-                            * self._ego_transform.inverse()
-                        )
-                        send_sock.sendall(pack(UDPViveActionServer.FMT, *offset.rotation_q(), *offset.translation(), 0))
 
                     if Button(int(unpacked[7])) & self._grp_btn and not Button(int(self._buttons)) & self._grp_btn:
                         # just pressed
@@ -189,52 +182,123 @@ def __enter__(self):
     def __exit__(self, *_):
         self.stop()
 
+    def stop_env_loop(self):
+        self._step_env = False
+
     def environment_step_loop(self):
-        while True:
+        self._step_env = True
+        while self._step_env:
+            if self._exit_requested:
+                self._step_env = False
+                break
             with self._resource_lock:
                 displacement = self.next_action()
             action = dict(
                 LimitedTQuartRelDictType(tquart=np.concatenate([displacement.translation(), displacement.rotation_q()]))
             )
+
             action.update(GripperDictType(gripper=self._grp_pos))
+
             with self._env_lock:
+                sleep(0.001)
                 self._env.step(action)
 
 
-def main():
-    # if ROBOT_INSTANCE == RobotInstance.HARDWARE:
-    #     user, pw = load_creds_fr3_desk()
-    #     resource_manger = FCI(Desk(ROBOT_IP, user, pw), unlock=False, lock_when_done=False)
-    # else:
-    #     resource_manger = DummyResourceManager()
+def input_loop(env_rel, action_server: UDPViveActionServer, camera_set: RealSenseCameraSet):
+    while True:
+        i = input("> ")
+        match i:
+            case "help":
+                print("You can use `quit` to stop the program, `episode` to start a new episode")
+            case "quit":
+                sys.exit(0)
+            case "episode":
+                # camera_set.clear_buffer()
+                # record videos
+                video_path = env_rel.path / "videos"
+                video_path.mkdir(parents=True, exist_ok=True)
+                print(f'{env_rel.episode_count = }')
+                # camera_set.record_video(video_path, env_rel.episode_count)
+
+                thread = threading.Thread(target=action_server.environment_step_loop)
+                thread.start()
+                input("Robot is being stepped, press enter to finish and save episode.")
+                print("stopping")
+                action_server.stop_env_loop()
+                thread.join()
+                # save
+                # camera_set.stop_video()
+                env_rel.reset()
+                print("videos saved!")
 
-    # with resource_manger:
 
+def main():
     if ROBOT_INSTANCE == RobotInstance.HARDWARE:
-        env_rel = fr3_hw_env(
-            ip=ROBOT_IP,
-            control_mode=ControlMode.CARTESIAN_TQuart,
-            robot_cfg=default_fr3_hw_robot_cfg(),
-            collision_guard="lab",
-            gripper_cfg=default_fr3_hw_gripper_cfg(),
-            max_relative_movement=0.5,
-            relative_to=RelativeTo.CONFIGURED_ORIGIN,
-        )
+        user, pw = load_creds_fr3_desk()
+        resource_manger = FCI(Desk(ROBOT_IP, user, pw), unlock=False, lock_when_done=False, guiding_mode_when_done=True)
     else:
-        env_rel = fr3_sim_env(
-            control_mode=ControlMode.CARTESIAN_TQuart,
-            # control_mode=ControlMode.JOINTS,
-            robot_cfg=default_fr3_sim_robot_cfg(),
-            gripper_cfg=default_fr3_sim_gripper_cfg(),
-            camera_set_cfg=default_mujoco_cameraset_cfg(),
-            max_relative_movement=0.5,
+        resource_manger = DummyResourceManager()
+
+    with resource_manger:
+
+        if ROBOT_INSTANCE == RobotInstance.HARDWARE:
+            camera_dict = {
+                "wrist": "244222071045",
+                # "wrist": "218622278131", # new realsense
+                "bird_eye": "243522070364",
+                # "side": "243522070385",
+                "side": "244222071045",
+            }
+            # camera_set = default_realsense(camera_dict)
+            camera_set = None
+            env_rel = fr3_hw_env(
+                ip=ROBOT_IP,
+                # camera_set = camera_set,
+                # collision_guard="models/scenes/lab/teleop_scene.xml",
+                collision_guard="models/scenes/lab/scene.xml",
+                robot_cfg=default_fr3_hw_robot_cfg(),
+                control_mode=ControlMode.CARTESIAN_TQuart,
+                # control_mode=ControlMode.JOINTS,
+                gripper_cfg=default_fr3_hw_gripper_cfg(),
+                max_relative_movement=(0.5, np.deg2rad(90)),
+                # max_relative_movement=np.deg2rad(90),
             relative_to=RelativeTo.CONFIGURED_ORIGIN,
-        )
-
-    env_rel.reset()
-
-    with UDPViveActionServer(VIVE_HOST, VIVE_PORT, typing.cast(RelativeActionSpace, env_rel)) as action_server:
-        action_server.environment_step_loop()
+            )
+        else:
+            env_rel = fr3_sim_env(
+                control_mode=ControlMode.CARTESIAN_TQuart,
+                # control_mode=ControlMode.JOINTS,
+                robot_cfg=default_fr3_sim_robot_cfg(),
+                collision_guard=False,
+                # mjcf="models/scenes/lab/teleop_scene.xml",
+                gripper_cfg=default_fr3_sim_gripper_cfg(),
+                # camera_set_cfg=default_mujoco_cameraset_cfg(),
+                max_relative_movement=0.5,
+                relative_to=RelativeTo.CONFIGURED_ORIGIN,
+            )
+            env_rel.get_wrapper_attr("sim").open_gui()
+
+        if not DEBUG:
+            env_rel = StorageWrapper(env_rel, path="/home/juelg/code/frankcsy/record_real_christmas")
+            # ip_secondary = "192.168.102.1"
+            # with Desk.fci(ip_secondary, user, pw):
+            #     f = rcsss.hw.FR3(ip_secondary)
+            #     config = rcsss.hw.FR3Config()
+            #     f.set_parameters(config)
+            #     env_rel.get_wrapper_attr("log_files")({
+            #         "camrobot_cart.txt": str(f.get_cartesian_position()),
+            #         "camrobot_joints.txt": str(f.get_joint_position()),
+            #     })
+
+        env_rel.reset()
+
+        with env_rel:
+            with UDPViveActionServer(VIVE_HOST, VIVE_PORT, env_rel) as action_server:
+                if not DEBUG:
+                    input_loop(env_rel, action_server, None)
+
+                else:
+                    action_server.environment_step_loop()
 
 
 if __name__ == "__main__":
diff --git a/python/rcsss/envs/wrappers.py b/python/rcsss/envs/wrappers.py
new file mode 100644
index 00000000..6ae92dbb
--- /dev/null
+++ b/python/rcsss/envs/wrappers.py
@@ -0,0 +1,155 @@
+import copy
+from datetime import datetime
+import os
+from pathlib import Path
+from typing import Any, SupportsFloat
+
+import gymnasium as gym
+
+import os
+
+import gymnasium as gym
+import numpy as np
+from flatten_dict import flatten
+from PIL import Image
+
+class StorageWrapper(gym.Wrapper):
+    # TODO: this should also record the instruction
+    FILE = "episode_{}.npz"
+    GIF = "{}_episode_{}_{}.gif"
+    FOLDER = "experiment_{}"
+    GIF_DURATION_S = 0.5
+
+    def __init__(self, env: gym.Env, path: str, instruction: str | None = None, description: str | None = None, record_numpy: bool = True, gif: bool = True):
+        super().__init__(env)
+        self.episode_count = 0
+        self.step_count = 0
+        self.data = {}
+        self.timestamp = str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
+        self.record_numpy = record_numpy
+        self.gif = gif
+
+        # make folders
+        self.path = Path(path) / self.FOLDER.format(self.timestamp)
+        Path(self.path).mkdir(parents=True, exist_ok=False)
+        if description is None:
+            # write a small description from input into file
+            description = input("Please enter a description for this experiment: ")
+        with open(self.path / "description.txt", "w") as f:
+            f.write(description)
+        if instruction is None:
+            # write instruction from input into file
+            instruction = input("Instruction: ")
+        self.language_instruction = str(instruction)
+        self.data["language_instruction"] = self.language_instruction
+        # get git commit id
+        os.system(f'git log --format="%H" -n 1 > {os.path.join(str(self.path), "git_id.txt")}')
+        # submodule git ids
+        os.system(f'git submodule status > {os.path.join(str(self.path), "git_id_submodules.txt")}')
+        # get git diff
+        os.system(f'git diff --submodule=diff > {os.path.join(str(self.path), "git_diff.txt")}')
+
+    def flush(self):
+        """writes data to disk"""
+        if len(self.data) == 0 or self.step_count == 0:
+            return
+        print(self.data.keys())
+        if self.record_numpy:
+            np.savez(self.path / self.FILE.format(self.episode_count), **self.data)
+
+        if self.gif:
+            # for key in ["side", "wrist", "bird_eye", "openvla_view"]:
+            for key in ["side", "right_side", "bird_eye", "left_side", "front"]:
+                if f"observation.frames.{key}.rgb" in self.data:
+                    imgs = []
+                    previous_timestamp = 0
+                    for idx in range(min(len(self.data[f"observation.frames.{key}.rgb"]), len(self.data["timestamp"]))):
+                        # skip images that have timestamps closer together than 0.5s
+                        img = self.data[f"observation.frames.{key}.rgb"][idx]
+                        if self.data["timestamp"][idx] - previous_timestamp < self.GIF_DURATION_S:
+                            continue
+                        previous_timestamp = self.data["timestamp"][idx]
+                        imgs.append(Image.fromarray(img))
+                    imgs[0].save(
+                        self.path / self.GIF.format(self.timestamp, self.episode_count, key),
+                        save_all=True,
+                        append_images=imgs[1:],
+                        duration=self.GIF_DURATION_S * 1000,
+                        loop=0,
+                    )
+
+        self.episode_count += 1
+        self.data = {}
+
+    def step(self, action: dict) -> tuple[Any, SupportsFloat, bool, bool, dict[str, Any]]:
+        obs, reward, terminated, truncated, info = super().step(action)
+        # write obs and action into data
+        act_obs = {"action": action, "observation": obs}
+        flattened_act_obs = flatten(copy.deepcopy(act_obs), reducer="dot")
+        # add timestamp
+        flattened_act_obs["timestamp"] = datetime.now().timestamp()
+        self.data["language_instruction"] = self.language_instruction
+        for key, value in flattened_act_obs.items():
+            if key not in self.data:
+                self.data[key] = np.expand_dims(value, axis=0)
+            else:
+                self.data[key] = np.concatenate([self.data[key], np.expand_dims(value, axis=0)], axis=0)
+        self.step_count += 1
+        return obs, reward, terminated, truncated, info
+
+    def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[Any, dict[str, Any]]:
+        self.flush()
+        self.step_count = 0
+        return super().reset(seed=seed, options=options)
+
+    def close(self):
+        self.flush()
+        return super().close()
+
+    @property
+    def logger_dir(self):
+        return self.path
+
+    def log_files(self, file2content: dict[str, str]):
+        for fn, content in file2content.items():
+            with open(self.path / fn, "w") as f:
+                f.write(content)
+
+
+def listdict2dictlist(LD):
+    return {k: [dic[k] for dic in LD] for k in LD[0]}
+
+
+class RHCWrapper(gym.Wrapper):
+    """
+    Performs receding horizon control. The policy returns `pred_horizon` actions and
+    we execute `exec_horizon` of them.
+    """
+
+    def __init__(self, env: gym.Env, exec_horizon: int):
+        super().__init__(env)
+        self.exec_horizon = exec_horizon
+
+    def step(self, actions):
+        if self.exec_horizon == 1 and len(actions.shape) == 1:
+            actions = actions[None]
+        assert len(actions) >= self.exec_horizon
+        rewards = []
+        observations = []
+        infos = []
+
+        for i in range(self.exec_horizon):
+            # obs, reward, done, trunc, info = self.env.step(actions[i])
+            obs, reward, done, trunc, info = self.env.step({"xyzrpy": actions[i, :6], "gripper": actions[i, 6]})
+            observations.append(obs)
+            rewards.append(reward)
+            infos.append(info)
+
+            if done or trunc:
+                break
+
+        infos = listdict2dictlist(infos)
+        infos["rewards"] = rewards
+        infos["observations"] = observations
+
+        return obs, np.sum(rewards), done, trunc, infos
\ No newline at end of file

From 5762b92384d35b72e228df04a64cbd7b6ffe7b9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 25 Mar 2025 14:08:35 +0100
Subject: [PATCH 08/56] feat: added webcam live viewer and vla executer

---
 python/examples/liveviewer.py   |  59 +++++++++
 python/rcsss/control/run_vla.py | 209 ++++++++++++++++++++++++++++++++
 2 files changed, 268 insertions(+)
 create mode 100644 python/examples/liveviewer.py
 create mode 100644 python/rcsss/control/run_vla.py

diff --git a/python/examples/liveviewer.py b/python/examples/liveviewer.py
new file mode 100644
index 00000000..f92447d2
--- /dev/null
+++ b/python/examples/liveviewer.py
@@ -0,0 +1,59 @@
+import argparse
+import cv2
+from matplotlib import pyplot as plt
+from rcsss.camera.interface import BaseCameraConfig
+from rcsss.camera.realsense import RealSenseCameraSet, RealSenseSetConfig
+
+
+if __name__ == "__main__":
+    # arg parse to get the key for the camera
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--camera", type=str, default="side")
+    args = parser.parse_args()
+
+    cameras = {
+        "side2": BaseCameraConfig(identifier="244222071045"), # old wrist
+        # "wrist": BaseCameraConfig(identifier="218622278131"), # new realsense
+        "bird_eye": BaseCameraConfig(identifier="243522070364"), # bird eye
+        # "side": BaseCameraConfig(identifier="243522070385"),
+        "side": BaseCameraConfig(identifier="243522070385"), # side
+    }
+
+    cam_cfg = RealSenseSetConfig(
+        cameras=cameras,
+        resolution_width=1280,
+        resolution_height=720,
+        frame_rate=15,
+        enable_imu=False,  # does not work with imu, why?
+        enable_ir=True,
+        enable_ir_emitter=False,
+    )
+
+    camera_set = RealSenseCameraSet(cam_cfg)
+    frame_set = camera_set.poll_frame_set()
+
+    # data = cv2.rotate(frame_set.frames[args.camera].camera.color.data, cv2.ROTATE_90_COUNTERCLOCKWISE)
+    data = frame_set.frames[args.camera].camera.color.data
+    img = plt.imshow(data)
+    plt.show(block=False)
+    plt.pause(0.1)
+
+    # while True:
+    for i in range(300):
+        frame_set = camera_set.poll_frame_set()
+
+        # # save the images in the frameset
+        # for key, frame in frame_set.frames.items():
+        #     cv2.imwrite(f"{key}.png", frame.camera.color.data)
+        # break
+
+        # img.set_data(frame_set.frames[args.camera].camera.color.data)
+        # # data = cv2.rotate(frame_set.frames[args.camera].camera.color.data, cv2.ROTATE_90_COUNTERCLOCKWISE)
+        # plt.draw()
+        # plt.pause(0.5)
+
+    # save the images in the frameset
+    for key, frame in frame_set.frames.items():
+        # cv2.imwrite(f"{key}.png", frame.camera.color.data)
+        # save in rgb
+        cv2.imwrite(f"{key}.png", cv2.cvtColor(frame.camera.color.data, cv2.COLOR_BGR2RGB))
diff --git a/python/rcsss/control/run_vla.py b/python/rcsss/control/run_vla.py
new file mode 100644
index 00000000..22ac6a38
--- /dev/null
+++ b/python/rcsss/control/run_vla.py
@@ -0,0 +1,209 @@
+from dataclasses import dataclass
+import logging
+from time import sleep
+from PIL import Image
+import cv2
+import gymnasium as gym
+import numpy as np
+import matplotlib.pyplot as plt
+from rcsss.envs.wrappers import RHCWrapper, StorageWrapper
+from omegaconf import OmegaConf
+from rcsss._core.sim import FR3, FR3State
+from rcsss.camera.realsense import RealSenseCameraSet
+from rcsss.config import read_config_yaml
+
+# from rcsss.desk import FCI, Desk
+from rcsss.control.fr3_desk import FCI, Desk, DummyResourceManager
+from rcsss.control.utils import load_creds_fr3_desk
+from rcsss.envs.base import ControlMode, RelativeTo, RobotInstance
+from rcsss.envs.factories import default_fr3_hw_gripper_cfg, default_fr3_hw_robot_cfg, default_fr3_sim_gripper_cfg, default_fr3_sim_robot_cfg, default_mujoco_cameraset_cfg, default_realsense, fr3_hw_env, fr3_sim_env
+import requests
+import json_numpy
+
+from agents.policies import Act, Obs
+from agents.client import RemoteAgent
+from rcsss.envs.base import ControlMode
+
+json_numpy.patch()
+
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+ROBOT_INSTANCE = RobotInstance.HARDWARE
+# CAMERA_GUI = "rgb_side"
+# INSTRUCTION = "pick up the blue cube"
+INSTRUCTION = "pick up the can"
+
+default_cfg = OmegaConf.create(
+    {"host": "dep-eng-air-p-1.hosts.utn.de", "port": 9000, "model": "openvla", "robot_ip": "192.168.101.1", "debug": True}
+    # {"host": "dep-eng-air-p-1.hosts.utn.de", "port": 7000, "model": "octo", "robot_ip": "192.168.101.1", "debug": False}
+    # {"host": "localhost", "port": 8080, "model": "chatgpt", "robot_ip": "192.168.101.1", "debug": False}
+)
+cli_conf = OmegaConf.from_cli()
+cfg = OmegaConf.merge(cli_conf, default_cfg)
+DEBUG = cfg.debug
+
+
+class RobotControl:
+    def __init__(self, env: gym.Env, instruction: str, model_host: str, model_port: int, model_name: str):
+        self.env = env
+        self.gripper_state = 1
+
+        self.instruction = instruction
+        self.remote_agent = RemoteAgent(model_host, model_port, model_name)
+
+        if not DEBUG:
+            self.env.get_wrapper_attr("log_files")(self.remote_agent.git_status())
+
+    def get_obs(self, obs: dict) -> Obs:
+        # bird_eye=obs["frames"]["bird_eye"]["rgb"]
+        # wrist=obs["frames"]["wrist"]["rgb"]
+
+        side = obs["frames"]["side"]["rgb"]
+        # side = obs["frames"]["default_free"]["rgb"]
+        # side = obs["frames"]["openvla_view"]["rgb"]
+
+        side = np.array(Image.fromarray(side).resize(
+                            (256, 256), Image.Resampling.LANCZOS))
+
+
+        
+
+        # center crop square
+        # h = side.shape[0]
+        # start_w = (side.shape[1] - h) // 2
+        # end_w = start_w + h
+        # side = side[:, start_w:end_w]
+        # bird_eye = bird_eye[:, start_w:end_w]
+        # wrist = wrist[:, start_w:end_w]
+
+        # rotate side by 90 degrees
+        # side = cv2.rotate(side, cv2.ROTATE_90_COUNTERCLOCKWISE)
+
+        # rescale side to 256x256
+        # side = cv2.resize(side, dsize=(256, 256), interpolation=cv2.INTER_CUBIC)
+        # bird_eye = cv2.resize(bird_eye, dsize=(256, 256), interpolation=cv2.INTER_CUBIC)
+        # wrist = cv2.resize(wrist, dsize=(256, 256), interpolation=cv2.INTER_CUBIC)
+
+        # TODO: add some noise, use other cameras, record what is happening
+
+        # return {"bird_eye": bird_eye, "wrist": wrist, "side": side, "gripper": obs["gripper"]}
+        # return Obs(rgb_bird_eye=bird_eye, rgb_wrist=wrist, rgb_side=side, gripper=obs["gripper"], rgb=obs["frames"])
+        return Obs(rgb_side=side, gripper=obs["gripper"]) #, info={"rgb": obs["frames"], "xyzrpy": obs["xyzrpy"]})
+
+    def step(self, action) -> tuple[bool, list[str], dict]:
+        # TODO Check if the model indicates when an action is finished.
+        logger.info("Executing command")
+        # only translation
+        # action[3:6] = [0, 0, 0]
+        # obs, _, _, truncated, info = self.env.step({"xyzrpy": action[:6], "gripper": action[6]})
+        print("action", action)
+        obs, _, _, truncated, info = self.env.step(action)
+        del info["observations"]
+        print("info", info)
+        # TODO: ik success from info
+        return False, obs, info
+
+    def loop(self):
+        # Initialize the environment and obtain the initial observation
+        obs, _ = self.env.reset()
+        info = {}
+
+        logger.info("Initializing")
+        obs_dict = self.get_obs(obs)
+
+        # img = plt.imshow(obs_dict[CAMERA_GUI])
+        # plt.show(block=False)
+        # plt.pause(0.1)
+        self.remote_agent.reset(obs_dict, instruction=self.instruction)
+
+        logger.info("Starting control loop")
+        while True:
+            logger.info("Getting action")
+            obs_dict.info = info
+            action = self.remote_agent.act(obs_dict)
+            if action.done:
+                logger.info("done issues by agent, shutting down")
+                break
+            logger.info(f"action issued by agent: {action.action}")
+
+            done, obs, info = self.step(action.action)
+            obs_dict = self.get_obs(obs)
+            # img.set_data(obs_dict.rgb_side)
+            # plt.draw()
+            # plt.pause(0.5)
+
+            if done:
+                break
+
+
+def main():
+    if ROBOT_INSTANCE == RobotInstance.HARDWARE:
+        user, pw = load_creds_fr3_desk()
+        resource_manger = FCI(Desk(cfg.robot_ip, user, pw), unlock=False, lock_when_done=False, guiding_mode_when_done=True)
+    else:
+        resource_manger = DummyResourceManager()
+    with resource_manger:
+
+        if ROBOT_INSTANCE == RobotInstance.HARDWARE:
+            camera_dict = {
+                # "wrist": "244222071045",
+                # "bird_eye": "243522070364",
+                # "side": "243522070385",
+                "side": "244222071045",
+            }
+
+            camera_set = default_realsense(camera_dict)
+            # env = utils.fr3_hw_env(
+            #     ip=cfg.robot_ip,
+            #     camera_set = camera_set,
+            #     control_mode=ControlMode.CARTESIAN_TRPY,
+            #     collision_guard=None,
+            #     gripper=True,
+            #     max_relative_movement=(0.1, np.deg2rad(5)),
+            #     asynchronous=True,
+            # relative_to=RelativeTo.LAST_STEP,
+            # )
+            env = fr3_hw_env(
+                ip=cfg.robot_ip,
+                camera_set = camera_set,
+                robot_cfg=default_fr3_hw_robot_cfg(),
+                control_mode=ControlMode.CARTESIAN_TRPY,
+                collision_guard=None,
+                gripper_cfg=default_fr3_hw_gripper_cfg(),
+                max_relative_movement=(0.1, np.deg2rad(5)),
+            relative_to=RelativeTo.LAST_STEP,
+            )
+        else:
+            env = fr3_sim_env(
+                control_mode=ControlMode.CARTESIAN_TRPY,
+                robot_cfg=default_fr3_sim_robot_cfg(),
+                gripper_cfg=default_fr3_sim_gripper_cfg(),
+                camera_set_cfg=default_mujoco_cameraset_cfg(),
+                max_relative_movement=(0.5, np.deg2rad(0)),
+                # mjcf="/home/juelg/code/frankcsy/robot-control-stack/build/_deps/scenes-src/scenes/fr3_empty_world/scene.xml",
+                mjcf="/home/juelg/code/frankcsy/models/scenes/lab/scene.xml",
+                relative_to=RelativeTo.LAST_STEP,
+            )
+            env.get_wrapper_attr("sim").open_gui()
+        if not DEBUG:
+            env = StorageWrapper(env, path="octo_realv1_async", instruction=INSTRUCTION, record_numpy=False)
+
+            # record videos
+            video_path = env.path / "videos"
+            video_path.mkdir(parents=True, exist_ok=True)
+            camera_set.record_video(video_path, 0)
+
+        env = RHCWrapper(env, exec_horizon=1)
+        controller = RobotControl(env, INSTRUCTION, cfg.host, cfg.port, cfg.model)
+        # controller = RobotControl(env, "Pick up the Yellow Brick from the Top", cfg.host, cfg.port, cfg.model)
+        # controller = RobotControl(env, "Move Yellow Box to the Right", cfg.host, cfg.port, cfg.model)
+        input("press enter to start")
+        with env:
+            controller.loop()
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 3832b41cd30629e21d63498a1b5388e2c8092e62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 25 Mar 2025 14:20:51 +0100
Subject: [PATCH 09/56] feat: added async for teleop

---
 python/rcsss/control/vive.py   |  3 ++-
 python/rcsss/envs/base.py      | 34 ++++++++++++++++++++++------------
 python/rcsss/envs/factories.py |  3 ++-
 3 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/python/rcsss/control/vive.py b/python/rcsss/control/vive.py
index f21433ae..74ae572a 100644
--- a/python/rcsss/control/vive.py
+++ b/python/rcsss/control/vive.py
@@ -262,7 +262,8 @@ def main():
                 gripper_cfg=default_fr3_hw_gripper_cfg(),
                 max_relative_movement=(0.5, np.deg2rad(90)),
                 # max_relative_movement=np.deg2rad(90),
-            relative_to=RelativeTo.CONFIGURED_ORIGIN,
+                relative_to=RelativeTo.CONFIGURED_ORIGIN,
+                async_control=True,
             )
         else:
             env_rel = fr3_sim_env(
diff --git a/python/rcsss/envs/base.py b/python/rcsss/envs/base.py
index cd15f2d3..27cd1195 100644
--- a/python/rcsss/envs/base.py
+++ b/python/rcsss/envs/base.py
@@ -160,7 +160,7 @@ class FR3Env(gym.Env):
     y
     """
 
-    def __init__(self, robot: hw.FR3, control_mode: ControlMode):
+    def __init__(self, robot: hw.FR3, control_mode: ControlMode, async_control: bool = False):
         self.robot = robot
         self._control_mode_overrides = [control_mode]
         self.action_space: gym.spaces.Dict
@@ -179,6 +179,7 @@ def __init__(self, robot: hw.FR3, control_mode: ControlMode):
         self.trpy_key = get_space_keys(TRPYDictType)[0]
         self.tquart_key = get_space_keys(TQuartDictType)[0]
         self.prev_action: dict | None = None
+        self.async_control = async_control
 
     def get_unwrapped_control_mode(self, idx: int) -> ControlMode:
         """Returns the unwrapped control mode at a certain index. 0 is the base control mode, -1 the last."""
@@ -224,25 +225,34 @@ def step(self, action: CartOrJointContType) -> tuple[ArmObsType, float, bool, bo
             or not np.allclose(action_dict[self.joints_key], self.prev_action[self.joints_key], atol=1e-03, rtol=0)
         ):
             # cast is needed because typed dicts cannot be checked at runtime
-            # self.robot.set_joint_position(action_dict[self.joints_key])
-            self.robot.controller_set_joint_position(action_dict[self.joints_key])
+            if not self.async_control:
+                self.robot.set_joint_position(action_dict[self.joints_key])
+            else:
+                self.robot.controller_set_joint_position(action_dict[self.joints_key])
         elif self.get_base_control_mode() == ControlMode.CARTESIAN_TRPY and (
             self.prev_action is None
             or not np.allclose(action_dict[self.trpy_key], self.prev_action[self.trpy_key], atol=1e-03, rtol=0)
         ):
-            self.robot.set_cartesian_position(
-                common.Pose(translation=action_dict[self.trpy_key][:3], rpy_vector=action_dict[self.trpy_key][3:])
-            )
+            if not self.async_control:
+                self.robot.set_cartesian_position(
+                    common.Pose(translation=action_dict[self.trpy_key][:3], rpy_vector=action_dict[self.trpy_key][3:])
+                )
+            else:
+                self.robot.osc_set_cartesian_position(
+                    common.Pose(translation=action_dict[self.trpy_key][:3], rpy_vector=action_dict[self.trpy_key][3:])
+                )
         elif self.get_base_control_mode() == ControlMode.CARTESIAN_TQuart and (
             self.prev_action is None
             or not np.allclose(action_dict[self.tquart_key], self.prev_action[self.tquart_key], atol=1e-03, rtol=0)
         ):
-            # self.robot.set_cartesian_position(
-            #     common.Pose(translation=action_dict[self.tquart_key][:3], quaternion=action_dict[self.tquart_key][3:])
-            # )
-            self.robot.osc_set_cartesian_position(
-                common.Pose(translation=action_dict[self.tquart_key][:3], quaternion=action_dict[self.tquart_key][3:])
-            )
+            if not self.async_control:
+                self.robot.set_cartesian_position(
+                    common.Pose(translation=action_dict[self.tquart_key][:3], quaternion=action_dict[self.tquart_key][3:])
+                )
+            else:
+                self.robot.osc_set_cartesian_position(
+                    common.Pose(translation=action_dict[self.tquart_key][:3], quaternion=action_dict[self.tquart_key][3:])
+                )
         self.prev_action = copy.deepcopy(action_dict)
         return self.get_obs(), 0, False, False, {}
 
diff --git a/python/rcsss/envs/factories.py b/python/rcsss/envs/factories.py
index b5c85388..08e4e459 100644
--- a/python/rcsss/envs/factories.py
+++ b/python/rcsss/envs/factories.py
@@ -89,6 +89,7 @@ def fr3_hw_env(
     max_relative_movement: float | tuple[float, float] | None = None,
     relative_to: RelativeTo = RelativeTo.LAST_STEP,
     urdf_path: str | PathLike | None = None,
+    async_control: bool = False,
 ) -> gym.Env:
     """
     Creates a hardware environment for the FR3 robot.
@@ -117,7 +118,7 @@ def fr3_hw_env(
     robot = rcsss.hw.FR3(ip, ik)
     robot.set_parameters(robot_cfg)
 
-    env: gym.Env = FR3Env(robot, ControlMode.JOINTS if collision_guard is not None else control_mode)
+    env: gym.Env = FR3Env(robot, ControlMode.JOINTS if collision_guard is not None else control_mode, async_control=async_control)
 
     env = FR3HW(env)
     if gripper_cfg is not None:

From 626b644ed5417e9408051af3cc153cf4dee5cbd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 25 Mar 2025 14:35:41 +0100
Subject: [PATCH 10/56] removed high collision values

---
 src/hw/FR3.cpp | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/src/hw/FR3.cpp b/src/hw/FR3.cpp
index a0125047..a2db360a 100644
--- a/src/hw/FR3.cpp
+++ b/src/hw/FR3.cpp
@@ -159,7 +159,7 @@ void FR3::controller_set_joint_position(
     const common::Vector7d &desired_q) {
 
   // from deoxys/config/osc-position-controller.yml
-  double traj_interpolation_time_fraction = 0.3; // in s
+  double traj_interpolation_time_fraction = 1.0; // in s
   // form deoxys/config/charmander.yml
   int policy_rate = 20;
   int traj_rate = 500;
@@ -190,7 +190,7 @@ void FR3::controller_set_joint_position(
 void FR3::osc_set_cartesian_position(
     const common::Pose &desired_pose_EE_in_base_frame) {
   // from deoxys/config/osc-position-controller.yml
-  double traj_interpolation_time_fraction = 0.3;
+  double traj_interpolation_time_fraction = 1.0;
   // form deoxys/config/charmander.yml
   int policy_rate = 20;
   int traj_rate = 500;
@@ -234,11 +234,11 @@ void FR3::osc() {
 
   this->controller_time = 0.0;
 
-  this->robot.setCollisionBehavior(
-      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
-      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
-      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
-      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}});
+  // this->robot.setCollisionBehavior(
+  //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+  //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+  //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+  //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}});
 
   // from bench mark
   // ([150.0, 150.0, 60.0], 250.0), // kp_translation, kp_rotation
@@ -294,9 +294,6 @@ void FR3::osc() {
     Eigen::Quaterniond desired_quat_EE_in_base_frame;
 
     common::Pose pose(robot_state.O_T_EE);
-    // auto pose = this->get_cartesian_position();
-    // from deoxys/config/osc-position-controller.yml
-    double traj_interpolation_time_fraction = 0.3;
     // form deoxys/config/charmander.yml
     int policy_rate = 20;
     int traj_rate = 500;
@@ -451,11 +448,11 @@ void FR3::osc() {
 void FR3::joint_controller() {
   franka::Model model = this->robot.loadModel();
   this->controller_time = 0.0;
-  this->robot.setCollisionBehavior(
-      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
-      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
-      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
-      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}});
+  // this->robot.setCollisionBehavior(
+  //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+  //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+  //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+  //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}});
 
 
   // deoxys/config/joint-impedance-controller.yml

From eed91771da4faf2ab4b15bb95368bed027150bb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 25 Mar 2025 14:39:47 +0100
Subject: [PATCH 11/56] fix(vive): collision guard

---
 python/rcsss/control/vive.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/rcsss/control/vive.py b/python/rcsss/control/vive.py
index 74ae572a..56e01162 100644
--- a/python/rcsss/control/vive.py
+++ b/python/rcsss/control/vive.py
@@ -254,8 +254,7 @@ def main():
             env_rel = fr3_hw_env(
                 ip=ROBOT_IP,
                 # camera_set = camera_set,
-                # collision_guard="models/scenes/lab/teleop_scene.xml",
-                collision_guard="models/scenes/lab/scene.xml",
+                # collision_guard="lab",
                 robot_cfg=default_fr3_hw_robot_cfg(),
                 control_mode=ControlMode.CARTESIAN_TQuart,
                 # control_mode=ControlMode.JOINTS,

From e7c20bdbd8b96aa81088c8e1d246719e042e624b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 25 Mar 2025 19:25:10 +0100
Subject: [PATCH 12/56] refactor(vive): more conservative lock usage

---
 python/rcsss/control/vive.py | 125 ++++++++++++++++++-----------------
 1 file changed, 66 insertions(+), 59 deletions(-)

diff --git a/python/rcsss/control/vive.py b/python/rcsss/control/vive.py
index 56e01162..29cebc78 100644
--- a/python/rcsss/control/vive.py
+++ b/python/rcsss/control/vive.py
@@ -84,17 +84,18 @@ def __init__(
         self._step_env = False
 
     def next_action(self) -> Pose:
-        transform = Pose(
-            translation=self._last_controller_pose.translation() - self._offset_pose.translation(),
-            quaternion=(self._last_controller_pose * self._offset_pose.inverse()).rotation_q(),
-        )
-        return (
-            self._ego_transform
-            * UDPViveActionServer.transform_from_openxr
-            * transform
-            * UDPViveActionServer.transform_from_openxr.inverse()
-            * self._ego_transform.inverse()
-        )
+        with self._resource_lock:
+            transform = Pose(
+                translation=self._last_controller_pose.translation() - self._offset_pose.translation(),
+                quaternion=(self._last_controller_pose * self._offset_pose.inverse()).rotation_q(),
+            )
+            return (
+                self._ego_transform
+                * UDPViveActionServer.transform_from_openxr
+                * transform
+                * UDPViveActionServer.transform_from_openxr.inverse()
+                * self._ego_transform.inverse()
+            )
 
     def get_last_controller_pose(self) -> Pose:
         return self._last_controller_pose
@@ -116,60 +117,64 @@ def run(self):
                         logger.warning("[UDP server] socket timeout (0.1s), waiting for packets")
                         warning_raised = True
                     continue
-                with self._resource_lock:
-                    last_controller_pose_raw = np.ctypeslib.as_array(unpacked[:7])
-                    last_controller_pose = Pose(
-                        translation=last_controller_pose_raw[4:],
-                        quaternion=last_controller_pose_raw[:4] if INCLUDE_ROTATION else np.array([0, 0, 0, 1]),
-                    )
-                    if Button(int(unpacked[7])) & self._trg_btn and not Button(int(self._buttons)) & self._trg_btn:
-                        # trigger just pressed (first data sample with button pressed
-
-                        # set forward direction based on current controller pose
-                        if self._ego_lock:
-                            x_axis = Pose(translation=np.array([1, 0, 0]))
-                            x_axis_rot = (
-                                UDPViveActionServer.transform_from_openxr
-                                * Pose(quaternion=last_controller_pose.rotation_q())
-                                * UDPViveActionServer.transform_from_openxr.inverse()
-                                * x_axis
-                            )
-
-                            # Compute angle around z axis: https://stackoverflow.com/questions/21483999/using-atan2-to-find-angle-between-two-vectors
-                            rot_z = np.atan2(x_axis_rot.translation()[1], x_axis_rot.translation()[0]) - np.atan2(
-                                x_axis.translation()[1], x_axis.translation()[0]
-                            )
-                            rot_z -= np.pi / 2
-
-                            print(f"Angle: {rot_z*180/np.pi}")
+                last_controller_pose_raw = np.ctypeslib.as_array(unpacked[:7])
+                last_controller_pose = Pose(
+                    translation=last_controller_pose_raw[4:],
+                    quaternion=last_controller_pose_raw[:4] if INCLUDE_ROTATION else np.array([0, 0, 0, 1]),
+                )
+                if Button(int(unpacked[7])) & self._trg_btn and not Button(int(self._buttons)) & self._trg_btn:
+                    # trigger just pressed (first data sample with button pressed
+
+                    # set forward direction based on current controller pose
+                    if self._ego_lock:
+                        x_axis = Pose(translation=np.array([1, 0, 0]))
+                        x_axis_rot = (
+                            UDPViveActionServer.transform_from_openxr
+                            * Pose(quaternion=last_controller_pose.rotation_q())
+                            * UDPViveActionServer.transform_from_openxr.inverse()
+                            * x_axis
+                        )
+
+                        # Compute angle around z axis: https://stackoverflow.com/questions/21483999/using-atan2-to-find-angle-between-two-vectors
+                        rot_z = np.atan2(x_axis_rot.translation()[1], x_axis_rot.translation()[0]) - np.atan2(
+                            x_axis.translation()[1], x_axis.translation()[0]
+                        )
+                        rot_z -= np.pi / 2
+
+                        print(f"Angle: {rot_z*180/np.pi}")
+                        with self._resource_lock:
                             self._ego_transform = Pose(RPY(yaw=-rot_z))
-                        else:
+                    else:
+                        with self._resource_lock:
                             self._ego_transform = Pose()
 
+                    with self._resource_lock:
                         self._offset_pose = last_controller_pose
                         self._last_controller_pose = last_controller_pose
 
-                    elif not Button(int(unpacked[7])) & self._trg_btn and Button(int(self._buttons)) & self._trg_btn:
-                        # released
-                        with self._env_lock:
-                            self._last_controller_pose = Pose()
-                            self._offset_pose = Pose()
-                            self._ego_transform = Pose()
-                            self._env.set_origin_to_current()
-
-                    elif Button(int(unpacked[7])) & self._trg_btn:
-                        # button is pressed
+                elif not Button(int(unpacked[7])) & self._trg_btn and Button(int(self._buttons)) & self._trg_btn:
+                    # released
+                    with self._resource_lock:
+                        self._last_controller_pose = Pose()
+                        self._offset_pose = Pose()
+                        self._ego_transform = Pose()
+                    with self._env_lock:
+                        self._env.set_origin_to_current()
+
+                elif Button(int(unpacked[7])) & self._trg_btn:
+                    # button is pressed
+                    with self._resource_lock:
                         self._last_controller_pose = last_controller_pose
 
 
-                    if Button(int(unpacked[7])) & self._grp_btn and not Button(int(self._buttons)) & self._grp_btn:
-                        # just pressed
-                        self._grp_pos = 0
-                    elif not Button(int(unpacked[7])) & self._grp_btn and Button(int(self._buttons)) & self._grp_btn:
-                        # just released
-                        self._grp_pos = 1
+                if Button(int(unpacked[7])) & self._grp_btn and not Button(int(self._buttons)) & self._grp_btn:
+                    # just pressed
+                    self._grp_pos = 0
+                elif not Button(int(unpacked[7])) & self._grp_btn and Button(int(self._buttons)) & self._grp_btn:
+                    # just released
+                    self._grp_pos = 1
 
-                    self._buttons = unpacked[7]
+                self._buttons = unpacked[7]
 
     def stop(self):
         self._exit_requested = True
@@ -191,8 +196,7 @@ def environment_step_loop(self):
             if self._exit_requested:
                 self._step_env = False
                 break
-            with self._resource_lock:
-                displacement = self.next_action()
+            displacement = self.next_action()
             action = dict(
                 LimitedTQuartRelDictType(tquart=np.concatenate([displacement.translation(), displacement.rotation_q()]))
             )
@@ -200,8 +204,9 @@ def environment_step_loop(self):
             action.update(GripperDictType(gripper=self._grp_pos))
 
             with self._env_lock:
-                sleep(0.001)
                 self._env.step(action)
+            # rate limit
+            sleep(0.001)
 
 
 def input_loop(env_rel, action_server: UDPViveActionServer, camera_set: RealSenseCameraSet):
@@ -211,6 +216,7 @@ def input_loop(env_rel, action_server: UDPViveActionServer, camera_set: RealSens
             case "help":
                 print("You can use `quit` to stop the program, `episode` to start a new episode")
             case "quit":
+                # camera_set.stop()
                 sys.exit(0)
             case "episode":
                 # camera_set.clear_buffer()
@@ -260,6 +266,7 @@ def main():
                 # control_mode=ControlMode.JOINTS,
                 gripper_cfg=default_fr3_hw_gripper_cfg(),
                 max_relative_movement=(0.5, np.deg2rad(90)),
+                # TODO: max should be always according to the last step
                 # max_relative_movement=np.deg2rad(90),
                 relative_to=RelativeTo.CONFIGURED_ORIGIN,
                 async_control=True,
@@ -270,7 +277,7 @@ def main():
                 # control_mode=ControlMode.JOINTS,
                 robot_cfg=default_fr3_sim_robot_cfg(),
                 collision_guard=False,
-                # mjcf="models/scenes/lab/teleop_scene.xml",
+                mjcf="lab",
                 gripper_cfg=default_fr3_sim_gripper_cfg(),
                 # camera_set_cfg=default_mujoco_cameraset_cfg(),
                 max_relative_movement=0.5,

From 120181f93d21210fbe48301fc845e20706d46170 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 25 Mar 2025 19:28:55 +0100
Subject: [PATCH 13/56] refactor: video frame is written every step

---
 python/rcsss/camera/hw.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/python/rcsss/camera/hw.py b/python/rcsss/camera/hw.py
index 9dd0538c..6ebcab71 100644
--- a/python/rcsss/camera/hw.py
+++ b/python/rcsss/camera/hw.py
@@ -126,16 +126,14 @@ def polling_thread(self, warm_up: bool = True):
             frame_set = self.poll_frame_set()
             with self._buffer_lock:
                 self._buffer[self._next_ring_index] = frame_set
+                # copy the buffer to the record path
+                for camera_key, writer in self.writer.items():
+                    frameset = self._buffer[self._next_ring_index]
+                    assert frameset is not None
+                    writer.write(frameset.frames[camera_key].camera.color.data[:, :, ::-1])
                 self._next_ring_index = (self._next_ring_index + 1) % self.config.max_buffer_frames
                 self._buffer_len = max(self._buffer_len + 1, self.config.max_buffer_frames)
-                if self._next_ring_index == 0:
-                    # copy the buffer to the record path
-                    for camera_key, writer in self.writer.items():
-                        for i in range(self.config.max_buffer_frames):
-                            frameset = self._buffer[i]
-                            assert frameset is not None
-                            writer.write(frameset.frames[camera_key].camera.color.data[:, :, ::-1])
-            sleep(1 / self.config.frame_rate)
+            self.rate(self.config.frame_rate)
 
     def poll_frame_set(self) -> FrameSet:
         """Gather frames over all available cameras."""

From f3c325717c482e15d92e2ca31f135d59e8935b39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 25 Mar 2025 19:29:45 +0100
Subject: [PATCH 14/56] feat(camera): added rate limiter

---
 python/rcsss/camera/hw.py        |  4 +++-
 python/rcsss/camera/interface.py | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/python/rcsss/camera/hw.py b/python/rcsss/camera/hw.py
index 6ebcab71..0220056e 100644
--- a/python/rcsss/camera/hw.py
+++ b/python/rcsss/camera/hw.py
@@ -14,6 +14,7 @@
     BaseCameraSetConfig,
     Frame,
     FrameSet,
+    SimpleFrameRate,
 )
 
 
@@ -40,6 +41,7 @@ def __init__(self):
         self._next_ring_index = 0
         self._buffer_len = 0
         self.writer: dict[str, cv2.VideoWriter] = {}
+        self.rate = SimpleFrameRate()
 
     def buffer_size(self) -> int:
         return len(self._buffer) - self._buffer.count(None)
@@ -117,7 +119,7 @@ def warm_up(self):
         for _ in range(self.config.warm_up_disposal_frames):
             for camera_name in self.camera_names:
                 self._poll_frame(camera_name)
-            sleep(1 / self.config.frame_rate)
+            self.rate(self.config.frame_rate)
 
     def polling_thread(self, warm_up: bool = True):
         if warm_up:
diff --git a/python/rcsss/camera/interface.py b/python/rcsss/camera/interface.py
index f341ee69..4330b275 100644
--- a/python/rcsss/camera/interface.py
+++ b/python/rcsss/camera/interface.py
@@ -1,9 +1,27 @@
 from dataclasses import dataclass
 from datetime import datetime
+from time import time, sleep
 from typing import Any, Protocol
 
 from pydantic import BaseModel, Field
 
+class SimpleFrameRate:
+    def __init__(self):
+        self.t = None
+
+    def reset(self):
+        self.t = None
+
+    def __call__(self, frame_rate: int):
+        if self.t is None:
+            self.t = time()
+            sleep(1 / frame_rate)
+            return
+        sleep_time = 1 / frame_rate - (time() - self.t)
+        if sleep_time > 0:
+            sleep(sleep_time)
+        self.t = time()
+
 
 class BaseCameraConfig(BaseModel):
     identifier: str

From d624ab19434dde875f71e1568204895f5c3d61c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 26 Mar 2025 07:09:57 +0100
Subject: [PATCH 15/56] fix(rel env): relative limiting for origin set

---
 python/rcsss/envs/base.py | 53 +++++++++++++++++++++++++++++++--------
 1 file changed, 42 insertions(+), 11 deletions(-)

diff --git a/python/rcsss/envs/base.py b/python/rcsss/envs/base.py
index 27cd1195..532435e6 100644
--- a/python/rcsss/envs/base.py
+++ b/python/rcsss/envs/base.py
@@ -348,6 +348,7 @@ def __init__(
         self.tquart_key = get_space_keys(LimitedTQuartRelDictType)[0]
         self.initial_obs: dict[str, Any] | None = None
         self._origin: common.Pose | Vec7Type | None = None
+        self._last_action: common.Pose | Vec7Type | None = None
 
     def set_origin(self, origin: common.Pose | Vec7Type):
         if self.unwrapped.get_control_mode() == ControlMode.JOINTS:
@@ -371,11 +372,13 @@ def reset(self, **kwargs) -> tuple[dict, dict[str, Any]]:
         obs, info = super().reset(**kwargs)
         self.initial_obs = obs
         self.set_origin_to_current()
+        self._last_action = None
         return obs, info
 
     def action(self, action: dict[str, Any]) -> dict[str, Any]:
         if self.relative_to == RelativeTo.LAST_STEP:
             # TODO: should we use the last observation instead?
+            # -> could be done after the step to the state that is returned by the observation
             self.set_origin_to_current()
         action = copy.deepcopy(action)
         if self.unwrapped.get_control_mode() == ControlMode.JOINTS and self.joints_key in action:
@@ -383,7 +386,13 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
             assert isinstance(self.max_mov, float)
             joint_space = cast(gym.spaces.Box, get_space(JointsDictType).spaces[self.joints_key])
             # TODO: should we also clip euqally for all joints?
-            limited_joints = np.clip(action[self.joints_key], -self.max_mov, self.max_mov)
+            if self.relative_to == RelativeTo.LAST_STEP or self._last_action is None:
+                limited_joints = np.clip(action[self.joints_key], -self.max_mov, self.max_mov)
+            else:
+                joints_diff = action[self.joints_key] - self._last_action
+                limited_joints_diff = np.clip(joints_diff, -self.max_mov, self.max_mov)
+                limited_joints = limited_joints_diff + self._last_action
+                self._last_action = limited_joints
             action.update(
                 JointsDictType(joints=np.clip(self._origin + limited_joints, joint_space.low, joint_space.high))
             )
@@ -393,14 +402,25 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
             assert isinstance(self.max_mov, tuple)
             pose_space = cast(gym.spaces.Box, get_space(TRPYDictType).spaces[self.trpy_key])
 
-            clipped_pose_offset = (
-                common.Pose(
+            if self.relative_to == RelativeTo.LAST_STEP:
+                clipped_pose_offset = (
+                    common.Pose(
+                        translation=action[self.trpy_key][:3],
+                        rpy_vector=action[self.trpy_key][3:],
+                    )
+                    .limit_translation_length(self.max_mov[0])
+                    .limit_rotation_angle(self.max_mov[1])
+                )
+            else:
+                pose_diff = common.Pose(
                     translation=action[self.trpy_key][:3],
                     rpy_vector=action[self.trpy_key][3:],
+                ) * self._origin.inverse()
+                clipped_pose_diff = (
+                    pose_diff.limit_translation_length(self.max_mov[0]).limit_rotation_angle(self.max_mov[1])
                 )
-                .limit_translation_length(self.max_mov[0])
-                .limit_rotation_angle(self.max_mov[1])
-            )
+                clipped_pose_offset = clipped_pose_diff * self._origin
+                self._last_action = clipped_pose_offset
 
             unclipped_pose = common.Pose(
                 translation=self._origin.translation() + clipped_pose_offset.translation(),
@@ -421,14 +441,25 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
             assert isinstance(self.max_mov, tuple)
             pose_space = cast(gym.spaces.Box, get_space(TQuartDictType).spaces[self.tquart_key])
 
-            clipped_pose_offset = (
-                common.Pose(
+            if self.relative_to == RelativeTo.LAST_STEP:
+                clipped_pose_offset = (
+                    common.Pose(
+                        translation=action[self.tquart_key][:3],
+                        quaternion=action[self.tquart_key][3:],
+                    )
+                    .limit_translation_length(self.max_mov[0])
+                    .limit_rotation_angle(self.max_mov[1])
+                )
+            else:
+                pose_diff = common.Pose(
                     translation=action[self.tquart_key][:3],
                     quaternion=action[self.tquart_key][3:],
+                ) * self._origin.inverse()
+                clipped_pose_diff = (
+                    pose_diff.limit_translation_length(self.max_mov[0]).limit_rotation_angle(self.max_mov[1])
                 )
-                .limit_translation_length(self.max_mov[0])
-                .limit_rotation_angle(self.max_mov[1])
-            )
+                clipped_pose_offset = clipped_pose_diff * self._origin # not sure if the order is correct
+                self._last_action = clipped_pose_offset
 
             unclipped_pose = common.Pose(
                 translation=self._origin.translation() + clipped_pose_offset.translation(),

From 9ae92efa880a7b6c1941c99c0f2b056e47ff8f6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 26 Mar 2025 08:02:02 +0100
Subject: [PATCH 16/56] refactor(hw camera): video recording in storage wrapper

---
 python/rcsss/camera/hw.py     |  3 +++
 python/rcsss/envs/wrappers.py | 24 +++++++++++++++++++++---
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/python/rcsss/camera/hw.py b/python/rcsss/camera/hw.py
index 0220056e..fa73fec2 100644
--- a/python/rcsss/camera/hw.py
+++ b/python/rcsss/camera/hw.py
@@ -103,6 +103,9 @@ def record_video(self, path: Path, episode: int):
                 (self.config.resolution_width, self.config.resolution_height),
             )
 
+    def recording_ongoing(self) -> bool:
+        return len(self.writer) > 0
+
     def stop_video(self):
         if len(self.writer) > 0:
             for camera_key, writer in self.writer.items():
diff --git a/python/rcsss/envs/wrappers.py b/python/rcsss/envs/wrappers.py
index 6ae92dbb..4b357d4a 100644
--- a/python/rcsss/envs/wrappers.py
+++ b/python/rcsss/envs/wrappers.py
@@ -12,6 +12,8 @@
 import numpy as np
 from flatten_dict import flatten
 from PIL import Image
+from rcsss.camera.hw import BaseHardwareCameraSet
+
 
 class StorageWrapper(gym.Wrapper):
     # TODO: this should also record the instruction
@@ -20,7 +22,16 @@ class StorageWrapper(gym.Wrapper):
     FOLDER = "experiment_{}"
     GIF_DURATION_S = 0.5
 
-    def __init__(self, env: gym.Env, path: str, instruction: str | None = None, description: str | None = None, record_numpy: bool = True, gif: bool = True):
+    def __init__(
+        self,
+        env: gym.Env,
+        path: str,
+        instruction: str | None = None,
+        description: str | None = None,
+        record_numpy: bool = True,
+        gif: bool = True,
+        camera_set: BaseHardwareCameraSet | None = None,
+    ):
         super().__init__(env)
         self.episode_count = 0
         self.step_count = 0
@@ -28,6 +39,7 @@ def __init__(self, env: gym.Env, path: str, instruction: str | None = None, desc
         self.timestamp = str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
         self.record_numpy = record_numpy
         self.gif = gif
+        self.camera_set = camera_set
 
         # make folders
         self.path = Path(path) / self.FOLDER.format(self.timestamp)
@@ -56,6 +68,8 @@ def flush(self):
         print(self.data.keys())
         if self.record_numpy:
             np.savez(self.path / self.FILE.format(self.episode_count), **self.data)
+        if self.camera_set is not None and self.camera_set.recording_ongoing():
+            self.camera_set.stop_video()
 
         if self.gif:
             # for key in ["side", "wrist", "bird_eye", "openvla_view"]:
@@ -81,6 +95,7 @@ def flush(self):
         self.episode_count += 1
         self.data = {}
 
+    # TODO: fix recorder order
     def step(self, action: dict) -> tuple[Any, SupportsFloat, bool, bool, dict[str, Any]]:
         obs, reward, terminated, truncated, info = super().step(action)
         # write obs and action into data
@@ -100,7 +115,10 @@ def step(self, action: dict) -> tuple[Any, SupportsFloat, bool, bool, dict[str,
     def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[Any, dict[str, Any]]:
         self.flush()
         self.step_count = 0
-        return super().reset(seed=seed, options=options)
+        re = super().reset(seed=seed, options=options)
+        if self.camera_set is not None:
+            self.camera_set.record_video(self.path, self.episode_count)
+        return re
 
     def close(self):
         self.flush()
@@ -152,4 +170,4 @@ def step(self, actions):
         infos["rewards"] = rewards
         infos["observations"] = observations
 
-        return obs, np.sum(rewards), done, trunc, infos
\ No newline at end of file
+        return obs, np.sum(rewards), done, trunc, infos

From 47c8a6cf96f686cf330dcb7ee8fc13ecacb14dc0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 26 Mar 2025 08:02:21 +0100
Subject: [PATCH 17/56] adapted relative limits for teleop

---
 python/rcsss/control/vive.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/python/rcsss/control/vive.py b/python/rcsss/control/vive.py
index 29cebc78..18c8b88a 100644
--- a/python/rcsss/control/vive.py
+++ b/python/rcsss/control/vive.py
@@ -224,7 +224,6 @@ def input_loop(env_rel, action_server: UDPViveActionServer, camera_set: RealSens
                 video_path = env_rel.path / "videos"
                 video_path.mkdir(parents=True, exist_ok=True)
                 print(f'{env_rel.episode_count = }')
-                # camera_set.record_video(video_path, env_rel.episode_count)
 
                 thread = threading.Thread(target=action_server.environment_step_loop)
                 thread.start()
@@ -232,8 +231,6 @@ def input_loop(env_rel, action_server: UDPViveActionServer, camera_set: RealSens
                 print("stopping")
                 action_server.stop_env_loop()
                 thread.join()
-                # save
-                # camera_set.stop_video()
                 env_rel.reset()
                 print("videos saved!")
 
@@ -265,9 +262,9 @@ def main():
                 control_mode=ControlMode.CARTESIAN_TQuart,
                 # control_mode=ControlMode.JOINTS,
                 gripper_cfg=default_fr3_hw_gripper_cfg(),
-                max_relative_movement=(0.5, np.deg2rad(90)),
+                max_relative_movement=(0.05, np.deg2rad(10)),
                 # TODO: max should be always according to the last step
-                # max_relative_movement=np.deg2rad(90),
+                # max_relative_movement=np.deg2rad(20),
                 relative_to=RelativeTo.CONFIGURED_ORIGIN,
                 async_control=True,
             )
@@ -286,7 +283,7 @@ def main():
             env_rel.get_wrapper_attr("sim").open_gui()
 
         if not DEBUG:
-            env_rel = StorageWrapper(env_rel, path="/home/juelg/code/frankcsy/record_real_christmas")
+            env_rel = StorageWrapper(env_rel, path="/home/juelg/code/frankcsy/record_real_christmas", camera_set=camera_set)
             # ip_secondary = "192.168.102.1"
             # with Desk.fci(ip_secondary, user, pw):
             #     f = rcsss.hw.FR3(ip_secondary)

From c4ef5f8018210000504a7004bc715b7fcedd390f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 26 Mar 2025 16:57:48 +0100
Subject: [PATCH 18/56] fix(hw): joint controller torque limit

---
 src/hw/FR3.cpp | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/hw/FR3.cpp b/src/hw/FR3.cpp
index a2db360a..42046064 100644
--- a/src/hw/FR3.cpp
+++ b/src/hw/FR3.cpp
@@ -525,7 +525,20 @@ void FR3::joint_controller() {
     std::array<double, 7> tau_d_array{};
     Eigen::VectorXd::Map(&tau_d_array[0], 7) = tau_d;
 
-    return tau_d_array;
+    // end of controller
+    std::chrono::high_resolution_clock::time_point t2 =
+        std::chrono::high_resolution_clock::now();
+    auto time = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1);
+
+    std::array<double, 7> tau_d_rate_limited = franka::limitRate(
+        franka::kMaxTorqueRate, tau_d_array, robot_state.tau_J_d);
+
+    // deoxys/config/control_config.yml
+    double min_torque = -5;
+    double max_torque = 5;
+    TorqueSafetyGuardFn(tau_d_rate_limited, min_torque, max_torque);
+
+    return tau_d_rate_limited;
   });
 }
 

From f974beaf8ff5689d26a2a4a13630a416f47b8b37 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 26 Mar 2025 16:59:21 +0100
Subject: [PATCH 19/56] fix: relative move move high freqency filter

---
 python/rcsss/envs/base.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/python/rcsss/envs/base.py b/python/rcsss/envs/base.py
index 532435e6..9126e73b 100644
--- a/python/rcsss/envs/base.py
+++ b/python/rcsss/envs/base.py
@@ -388,6 +388,7 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
             # TODO: should we also clip euqally for all joints?
             if self.relative_to == RelativeTo.LAST_STEP or self._last_action is None:
                 limited_joints = np.clip(action[self.joints_key], -self.max_mov, self.max_mov)
+                self._last_action = limited_joints
             else:
                 joints_diff = action[self.joints_key] - self._last_action
                 limited_joints_diff = np.clip(joints_diff, -self.max_mov, self.max_mov)
@@ -402,7 +403,7 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
             assert isinstance(self.max_mov, tuple)
             pose_space = cast(gym.spaces.Box, get_space(TRPYDictType).spaces[self.trpy_key])
 
-            if self.relative_to == RelativeTo.LAST_STEP:
+            if self.relative_to == RelativeTo.LAST_STEP or self._last_action is None:
                 clipped_pose_offset = (
                     common.Pose(
                         translation=action[self.trpy_key][:3],
@@ -411,15 +412,16 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
                     .limit_translation_length(self.max_mov[0])
                     .limit_rotation_angle(self.max_mov[1])
                 )
+                self._last_action = clipped_pose_offset
             else:
                 pose_diff = common.Pose(
                     translation=action[self.trpy_key][:3],
                     rpy_vector=action[self.trpy_key][3:],
-                ) * self._origin.inverse()
+                ) * self._last_action.inverse()
                 clipped_pose_diff = (
                     pose_diff.limit_translation_length(self.max_mov[0]).limit_rotation_angle(self.max_mov[1])
                 )
-                clipped_pose_offset = clipped_pose_diff * self._origin
+                clipped_pose_offset = clipped_pose_diff * self._last_action
                 self._last_action = clipped_pose_offset
 
             unclipped_pose = common.Pose(
@@ -441,7 +443,7 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
             assert isinstance(self.max_mov, tuple)
             pose_space = cast(gym.spaces.Box, get_space(TQuartDictType).spaces[self.tquart_key])
 
-            if self.relative_to == RelativeTo.LAST_STEP:
+            if self.relative_to == RelativeTo.LAST_STEP or self._last_action is None:
                 clipped_pose_offset = (
                     common.Pose(
                         translation=action[self.tquart_key][:3],
@@ -450,15 +452,16 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
                     .limit_translation_length(self.max_mov[0])
                     .limit_rotation_angle(self.max_mov[1])
                 )
+                self._last_action = clipped_pose_offset
             else:
                 pose_diff = common.Pose(
                     translation=action[self.tquart_key][:3],
                     quaternion=action[self.tquart_key][3:],
-                ) * self._origin.inverse()
+                ) * self._last_action.inverse()
                 clipped_pose_diff = (
                     pose_diff.limit_translation_length(self.max_mov[0]).limit_rotation_angle(self.max_mov[1])
                 )
-                clipped_pose_offset = clipped_pose_diff * self._origin # not sure if the order is correct
+                clipped_pose_offset = clipped_pose_diff * self._last_action
                 self._last_action = clipped_pose_offset
 
             unclipped_pose = common.Pose(

From bee976473f9b5981b992f55339b4bf5eefdd78d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 26 Mar 2025 21:58:35 +0100
Subject: [PATCH 20/56] fix: async reset

---
 python/rcsss/control/vive.py | 10 +++++-----
 python/rcsss/envs/base.py    |  3 +++
 src/hw/FR3.cpp               |  1 -
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/python/rcsss/control/vive.py b/python/rcsss/control/vive.py
index 18c8b88a..9520a7e8 100644
--- a/python/rcsss/control/vive.py
+++ b/python/rcsss/control/vive.py
@@ -249,20 +249,20 @@ def main():
                 "wrist": "244222071045",
                 # "wrist": "218622278131", # new realsense
                 "bird_eye": "243522070364",
-                # "side": "243522070385",
-                "side": "244222071045",
+                "side": "243522070385",
             }
-            # camera_set = default_realsense(camera_dict)
+            camera_set = default_realsense(camera_dict)
             camera_set = None
             env_rel = fr3_hw_env(
                 ip=ROBOT_IP,
-                # camera_set = camera_set,
+                camera_set = camera_set,
                 # collision_guard="lab",
                 robot_cfg=default_fr3_hw_robot_cfg(),
                 control_mode=ControlMode.CARTESIAN_TQuart,
                 # control_mode=ControlMode.JOINTS,
                 gripper_cfg=default_fr3_hw_gripper_cfg(),
-                max_relative_movement=(0.05, np.deg2rad(10)),
+                max_relative_movement=(0.01, np.deg2rad(5)),
+                # max_relative_movement=(0.5, np.deg2rad(90)),
                 # TODO: max should be always according to the last step
                 # max_relative_movement=np.deg2rad(20),
                 relative_to=RelativeTo.CONFIGURED_ORIGIN,
diff --git a/python/rcsss/envs/base.py b/python/rcsss/envs/base.py
index 9126e73b..a8c2c38d 100644
--- a/python/rcsss/envs/base.py
+++ b/python/rcsss/envs/base.py
@@ -265,7 +265,10 @@ def reset(
         if options is not None:
             msg = "options not implemented yet"
             raise NotImplementedError(msg)
+        if self.async_control:
+            self.robot.stop_control_thread()
         self.robot.reset()
+        self.robot.move_home()
         return self.get_obs(), {}
 
     def close(self):
diff --git a/src/hw/FR3.cpp b/src/hw/FR3.cpp
index 42046064..f625c20a 100644
--- a/src/hw/FR3.cpp
+++ b/src/hw/FR3.cpp
@@ -569,7 +569,6 @@ void FR3::automatic_error_recovery() { this->robot.automaticErrorRecovery(); }
 
 void FR3::reset() {
   this->automatic_error_recovery();
-  this->move_home();
 }
 
 void FR3::wait_milliseconds(int milliseconds) {

From 5ddff27c706c95e22f619fd10ab865e956fb9ea7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 26 Mar 2025 22:04:52 +0100
Subject: [PATCH 21/56] refactor(hw): moved stop async control thread to reset

---
 python/rcsss/envs/base.py | 2 --
 src/hw/FR3.cpp            | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/rcsss/envs/base.py b/python/rcsss/envs/base.py
index a8c2c38d..e70da752 100644
--- a/python/rcsss/envs/base.py
+++ b/python/rcsss/envs/base.py
@@ -265,8 +265,6 @@ def reset(
         if options is not None:
             msg = "options not implemented yet"
             raise NotImplementedError(msg)
-        if self.async_control:
-            self.robot.stop_control_thread()
         self.robot.reset()
         self.robot.move_home()
         return self.get_obs(), {}
diff --git a/src/hw/FR3.cpp b/src/hw/FR3.cpp
index f625c20a..c038ab1b 100644
--- a/src/hw/FR3.cpp
+++ b/src/hw/FR3.cpp
@@ -568,6 +568,7 @@ void FR3::move_home() { this->set_joint_position(q_home); }
 void FR3::automatic_error_recovery() { this->robot.automaticErrorRecovery(); }
 
 void FR3::reset() {
+  this->stop_control_thread();
   this->automatic_error_recovery();
 }
 

From 3649a8570d6820c6d3d09dc4ed17a48257ad4a7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 26 Mar 2025 22:16:07 +0100
Subject: [PATCH 22/56] fix: lock usage in camera

---
 python/rcsss/camera/hw.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/python/rcsss/camera/hw.py b/python/rcsss/camera/hw.py
index fa73fec2..3aaa6481 100644
--- a/python/rcsss/camera/hw.py
+++ b/python/rcsss/camera/hw.py
@@ -94,6 +94,9 @@ def start(self, warm_up: bool = True):
         self._thread.start()
 
     def record_video(self, path: Path, episode: int):
+        if self.recording_ongoing():
+            return
+        self.clear_buffer()
         for camera in self.camera_names:
             self.writer[camera] = cv2.VideoWriter(
                 str(path / f"episode_{episode}_{camera}.mp4"),
@@ -108,15 +111,16 @@ def recording_ongoing(self) -> bool:
 
     def stop_video(self):
         if len(self.writer) > 0:
-            for camera_key, writer in self.writer.items():
-                for i in range(self._next_ring_index):
-                    frameset = self._buffer[i]
-                    assert frameset is not None
-                    # rgb to bgr as expected by opencv
-                    writer.write(frameset.frames[camera_key].camera.color.data[:, :, ::-1])
-            for camera in self.camera_names:
-                self.writer[camera].release()
-            self.writer = {}
+            with self._buffer_lock:
+                for camera_key, writer in self.writer.items():
+                    for i in range(self._next_ring_index):
+                        frameset = self._buffer[i]
+                        assert frameset is not None
+                        # rgb to bgr as expected by opencv
+                        writer.write(frameset.frames[camera_key].camera.color.data[:, :, ::-1])
+                for camera in self.camera_names:
+                    self.writer[camera].release()
+                self.writer = {}
 
     def warm_up(self):
         for _ in range(self.config.warm_up_disposal_frames):

From 162dd75645022e048f03d834c97087a6c627e5a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 26 Mar 2025 22:25:10 +0100
Subject: [PATCH 23/56] refactor: proper rate limiter in vive

---
 python/rcsss/camera/interface.py | 9 ++++++---
 python/rcsss/control/vive.py     | 5 +++--
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/python/rcsss/camera/interface.py b/python/rcsss/camera/interface.py
index 4330b275..93602580 100644
--- a/python/rcsss/camera/interface.py
+++ b/python/rcsss/camera/interface.py
@@ -5,6 +5,7 @@
 
 from pydantic import BaseModel, Field
 
+
 class SimpleFrameRate:
     def __init__(self):
         self.t = None
@@ -12,12 +13,14 @@ def __init__(self):
     def reset(self):
         self.t = None
 
-    def __call__(self, frame_rate: int):
+    def __call__(self, frame_rate: int | float):
         if self.t is None:
             self.t = time()
-            sleep(1 / frame_rate)
+            sleep(1 / frame_rate if isinstance(frame_rate, int) else frame_rate)
             return
-        sleep_time = 1 / frame_rate - (time() - self.t)
+        sleep_time = (
+            1 / frame_rate - (time() - self.t) if isinstance(frame_rate, int) else frame_rate - (time() - self.t)
+        )
         if sleep_time > 0:
             sleep(sleep_time)
         self.t = time()
diff --git a/python/rcsss/control/vive.py b/python/rcsss/control/vive.py
index 9520a7e8..d2a93019 100644
--- a/python/rcsss/control/vive.py
+++ b/python/rcsss/control/vive.py
@@ -5,6 +5,7 @@
 from socket import AF_INET, SOCK_DGRAM, socket
 from struct import unpack
 from time import sleep
+from rcsss.camera.interface import SimpleFrameRate
 from rcsss.camera.realsense import RealSenseCameraSet
 
 import numpy as np
@@ -191,6 +192,7 @@ def stop_env_loop(self):
         self._step_env = False
 
     def environment_step_loop(self):
+        rate_limiter = SimpleFrameRate()
         self._step_env = True
         while self._step_env:
             if self._exit_requested:
@@ -205,8 +207,7 @@ def environment_step_loop(self):
 
             with self._env_lock:
                 self._env.step(action)
-            # rate limit
-            sleep(0.001)
+            rate_limiter(0.001)
 
 
 def input_loop(env_rel, action_server: UDPViveActionServer, camera_set: RealSenseCameraSet):

From 045a61cbae1a67945fdc1079bec9a6c09dd24bcf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 26 Mar 2025 22:59:37 +0100
Subject: [PATCH 24/56] improved camera set handling and vive script

- vive script more configuration in header: storage path, camera set and gif
- proper start and end of video recording
- gif folder
---
 python/rcsss/control/vive.py  | 37 ++++++++++++++++++-----------------
 python/rcsss/envs/wrappers.py |  8 +++++---
 2 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/python/rcsss/control/vive.py b/python/rcsss/control/vive.py
index d2a93019..ee79caad 100644
--- a/python/rcsss/control/vive.py
+++ b/python/rcsss/control/vive.py
@@ -42,7 +42,17 @@
 INCLUDE_ROTATION = True
 ROBOT_IP = "192.168.101.1"
 ROBOT_INSTANCE = RobotInstance.HARDWARE
-DEBUG = True
+DEBUG = False
+STORAGE_PATH = "teleop_data"
+# set camera dict to none disable cameras
+CAMERA_DICT = {
+    "wrist": "244222071045",
+    # "wrist": "218622278131", # new realsense
+    "bird_eye": "243522070364",
+    "side": "243522070385",
+}
+PRODUCE_GIF = True
+
 
 
 class Button(IntFlag):
@@ -210,20 +220,20 @@ def environment_step_loop(self):
             rate_limiter(0.001)
 
 
-def input_loop(env_rel, action_server: UDPViveActionServer, camera_set: RealSenseCameraSet):
+def input_loop(env_rel, action_server: UDPViveActionServer, camera_set: RealSenseCameraSet | None):
     while True:
         i = input("> ")
         match i:
             case "help":
                 print("You can use `quit` to stop the program, `episode` to start a new episode")
             case "quit":
-                # camera_set.stop()
                 sys.exit(0)
             case "episode":
-                # camera_set.clear_buffer()
                 # record videos
-                video_path = env_rel.path / "videos"
-                video_path.mkdir(parents=True, exist_ok=True)
+                if camera_set is not None:
+                    video_path = env_rel.path / "videos"
+                    video_path.mkdir(parents=True, exist_ok=True)
+                    camera_set.record_video(env_rel.path / "videos", env_rel.episode_count)
                 print(f'{env_rel.episode_count = }')
 
                 thread = threading.Thread(target=action_server.environment_step_loop)
@@ -246,14 +256,7 @@ def main():
     with resource_manger:
 
         if ROBOT_INSTANCE == RobotInstance.HARDWARE:
-            camera_dict = {
-                "wrist": "244222071045",
-                # "wrist": "218622278131", # new realsense
-                "bird_eye": "243522070364",
-                "side": "243522070385",
-            }
-            camera_set = default_realsense(camera_dict)
-            camera_set = None
+            camera_set = default_realsense(CAMERA_DICT)
             env_rel = fr3_hw_env(
                 ip=ROBOT_IP,
                 camera_set = camera_set,
@@ -264,7 +267,6 @@ def main():
                 gripper_cfg=default_fr3_hw_gripper_cfg(),
                 max_relative_movement=(0.01, np.deg2rad(5)),
                 # max_relative_movement=(0.5, np.deg2rad(90)),
-                # TODO: max should be always according to the last step
                 # max_relative_movement=np.deg2rad(20),
                 relative_to=RelativeTo.CONFIGURED_ORIGIN,
                 async_control=True,
@@ -284,7 +286,7 @@ def main():
             env_rel.get_wrapper_attr("sim").open_gui()
 
         if not DEBUG:
-            env_rel = StorageWrapper(env_rel, path="/home/juelg/code/frankcsy/record_real_christmas", camera_set=camera_set)
+            env_rel = StorageWrapper(env_rel, path=STORAGE_PATH, camera_set=camera_set, gif=PRODUCE_GIF)
             # ip_secondary = "192.168.102.1"
             # with Desk.fci(ip_secondary, user, pw):
             #     f = rcsss.hw.FR3(ip_secondary)
@@ -300,8 +302,7 @@ def main():
         with env_rel:
             with UDPViveActionServer(VIVE_HOST, VIVE_PORT, env_rel) as action_server:
                 if not DEBUG:
-                    input_loop(env_rel, action_server, None)
-
+                    input_loop(env_rel, action_server, camera_set)
                 else:
                     action_server.environment_step_loop()
 
diff --git a/python/rcsss/envs/wrappers.py b/python/rcsss/envs/wrappers.py
index 4b357d4a..f1c1feb0 100644
--- a/python/rcsss/envs/wrappers.py
+++ b/python/rcsss/envs/wrappers.py
@@ -61,6 +61,10 @@ def __init__(
         # get git diff
         os.system(f'git diff --submodule=diff > {os.path.join(str(self.path), "git_diff.txt")}')
 
+        self.gif_path = Path(path) / "gifs"
+        if self.gif:
+            self.gif_path.mkdir(parents=True, exist_ok=True)
+
     def flush(self):
         """writes data to disk"""
         if len(self.data) == 0 or self.step_count == 0:
@@ -85,7 +89,7 @@ def flush(self):
                         previous_timestamp = self.data["timestamp"][idx]
                         imgs.append(Image.fromarray(img))
                     imgs[0].save(
-                        self.path / self.GIF.format(self.timestamp, self.episode_count, key),
+                        self.gif_path / self.GIF.format(self.timestamp, self.episode_count, key),
                         save_all=True,
                         append_images=imgs[1:],
                         duration=self.GIF_DURATION_S * 1000,
@@ -116,8 +120,6 @@ def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = Non
         self.flush()
         self.step_count = 0
         re = super().reset(seed=seed, options=options)
-        if self.camera_set is not None:
-            self.camera_set.record_video(self.path, self.episode_count)
         return re
 
     def close(self):

From 3a22845ab6552b7cad3c19d3dec08e08350b4a18 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Thu, 27 Mar 2025 10:03:28 +0100
Subject: [PATCH 25/56] fix: remove slow unnecessary flatten

---
 pyproject.toml                | 1 -
 python/rcsss/envs/wrappers.py | 9 +++------
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index e0eba297..b41104b9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,6 @@ dependencies = ["websockets>=11.0",
     "mujoco==3.2.6",
     # NOTE: Same for pinocchio (pin)
     "pin==2.7.0",
-    "flatten-dict==0.4.2",
 ]
 readme = "README.md"
 maintainers = [
diff --git a/python/rcsss/envs/wrappers.py b/python/rcsss/envs/wrappers.py
index f1c1feb0..635b1826 100644
--- a/python/rcsss/envs/wrappers.py
+++ b/python/rcsss/envs/wrappers.py
@@ -10,7 +10,6 @@
 
 import gymnasium as gym
 import numpy as np
-from flatten_dict import flatten
 from PIL import Image
 from rcsss.camera.hw import BaseHardwareCameraSet
 
@@ -103,12 +102,10 @@ def flush(self):
     def step(self, action: dict) -> tuple[Any, SupportsFloat, bool, bool, dict[str, Any]]:
         obs, reward, terminated, truncated, info = super().step(action)
         # write obs and action into data
-        act_obs = {"action": action, "observation": obs}
-        flattened_act_obs = flatten(copy.deepcopy(act_obs), reducer="dot")
-        # add timestamp
-        flattened_act_obs["timestamp"] = datetime.now().timestamp()
+        act_obs = {"action": action, "observation": obs, "timestamp": datetime.now().timestamp()}
+        act_obs["timestamp"] = datetime.now().timestamp()
         self.data["language_instruction"] = self.language_instruction
-        for key, value in flattened_act_obs.items():
+        for key, value in act_obs.items():
             if key not in self.data:
                 self.data[key] = np.expand_dims(value, axis=0)
             else:

From 6eebe36e860b8af10a227eb5635465d8be6a89cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Thu, 27 Mar 2025 11:30:12 +0100
Subject: [PATCH 26/56] feat(hw gripper): async control for hardware gripper

---
 python/rcsss/_core/hw/__init__.pyi |  1 +
 python/rcsss/envs/factories.py     |  3 +-
 src/hw/FrankaHand.cpp              | 54 +++++++++++++++++++++---------
 src/hw/FrankaHand.h                |  4 +++
 src/pybind/rcsss.cpp               | 10 ++++--
 5 files changed, 53 insertions(+), 19 deletions(-)

diff --git a/python/rcsss/_core/hw/__init__.pyi b/python/rcsss/_core/hw/__init__.pyi
index f38dc9f9..f0661d0d 100644
--- a/python/rcsss/_core/hw/__init__.pyi
+++ b/python/rcsss/_core/hw/__init__.pyi
@@ -26,6 +26,7 @@ __all__ = [
 ]
 
 class FHConfig(rcsss._core.common.GConfig):
+    async_control: bool
     epsilon_inner: float
     epsilon_outer: float
     force: float
diff --git a/python/rcsss/envs/factories.py b/python/rcsss/envs/factories.py
index 08e4e459..93db3407 100644
--- a/python/rcsss/envs/factories.py
+++ b/python/rcsss/envs/factories.py
@@ -42,11 +42,12 @@ def default_fr3_hw_robot_cfg():
     return robot_cfg
 
 
-def default_fr3_hw_gripper_cfg():
+def default_fr3_hw_gripper_cfg(async_control: bool = False):
     gripper_cfg = rcsss.hw.FHConfig()
     gripper_cfg.epsilon_inner = gripper_cfg.epsilon_outer = 0.1
     gripper_cfg.speed = 0.1
     gripper_cfg.force = 30
+    gripper_cfg.async_control = async_control
     return gripper_cfg
 
 
diff --git a/src/hw/FrankaHand.cpp b/src/hw/FrankaHand.cpp
index 7ec33e85..8e8f1002 100644
--- a/src/hw/FrankaHand.cpp
+++ b/src/hw/FrankaHand.cpp
@@ -19,7 +19,7 @@ FrankaHand::FrankaHand(const std::string &ip, const FHConfig &cfg)
 FrankaHand::~FrankaHand() {}
 
 bool FrankaHand::set_parameters(const FHConfig &cfg) {
-  franka::GripperState gripper_state = gripper.readOnce();
+  franka::GripperState gripper_state = this->gripper.readOnce();
   if (gripper_state.max_width < cfg.grasping_width) {
     return false;
   }
@@ -35,7 +35,7 @@ FHConfig *FrankaHand::get_parameters() {
 }
 
 FHState *FrankaHand::get_state() {
-  franka::GripperState gripper_state = gripper.readOnce();
+  franka::GripperState gripper_state = this->gripper.readOnce();
   if (std::abs(gripper_state.max_width - this->cfg.grasping_width) > 0.01) {
     this->max_width = gripper_state.max_width - 0.001;
   }
@@ -53,54 +53,78 @@ void FrankaHand::set_normalized_width(double width, double force) {
     throw std::invalid_argument(
         "width must be between 0 and 1, force must be positive");
   }
-  franka::GripperState gripper_state = gripper.readOnce();
+  franka::GripperState gripper_state = this->gripper.readOnce();
   width = width * gripper_state.max_width;
   this->last_commanded_width = width;
   if (force < 0.01) {
-    gripper.move(width, this->cfg.speed);
+    this->gripper.move(width, this->cfg.speed);
   } else {
-    gripper.grasp(width, this->cfg.speed, force, this->cfg.epsilon_inner,
+    this->gripper.grasp(width, this->cfg.speed, force, this->cfg.epsilon_inner,
                   this->cfg.epsilon_outer);
   }
 }
 double FrankaHand::get_normalized_width() {
-  franka::GripperState gripper_state = gripper.readOnce();
+  franka::GripperState gripper_state = this->gripper.readOnce();
   return gripper_state.width / gripper_state.max_width;
 }
 
-void FrankaHand::m_reset() {
+void FrankaHand::m_stop(){
   this->gripper.stop();
+  if (this->control_thread.has_value()) {
+    this->control_thread->join();
+    this->control_thread.reset();
+  }
+
+}
+
+void FrankaHand::m_reset() {
+  this->m_stop();
   // open gripper
-  franka::GripperState gripper_state = gripper.readOnce();
+  franka::GripperState gripper_state = this->gripper.readOnce();
   this->max_width = gripper_state.max_width - 0.001;
   this->last_commanded_width = this->max_width;
-  gripper.move(this->max_width, this->cfg.speed);
+  this->gripper.move(this->max_width, this->cfg.speed);
 }
 void FrankaHand::reset() { this->m_reset(); }
 
 bool FrankaHand::is_grasped() {
-  franka::GripperState gripper_state = gripper.readOnce();
+  franka::GripperState gripper_state = this->gripper.readOnce();
   return gripper_state.is_grasped;
 }
 
 bool FrankaHand::homing() {
   // Do a homing in order to estimate the maximum
   // grasping width with the current fingers.
-  return gripper.homing();
+  return this->gripper.homing();
 }
 
 void FrankaHand::grasp() {
-  this->last_commanded_width = this->max_width;
-  gripper.grasp(0, this->cfg.speed, this->cfg.force, 1, 1);
+  this->last_commanded_width = 0;
+  if (!this->cfg.async_control) {
+    this->gripper.grasp(0, this->cfg.speed, this->cfg.force, 1, 1);
+    return;
+  }
+  this->m_stop();
+  this->control_thread = std::thread([&](){this->gripper.grasp(0, this->cfg.speed, this->cfg.force, 1, 1);});
 }
 
 void FrankaHand::open() {
   this->last_commanded_width = this->max_width;
-  gripper.move(this->max_width, this->cfg.speed);
+  if (!this->cfg.async_control) {
+    this->gripper.move(this->max_width, this->cfg.speed);
+    return;
+  }
+  this->m_stop();
+  this->control_thread = std::thread([&](){this->gripper.move(this->max_width, this->cfg.speed);});
 }
 void FrankaHand::shut() {
   this->last_commanded_width = 0;
-  gripper.move(0, this->cfg.speed);
+  if (!this->cfg.async_control) {
+    this->gripper.move(0, this->cfg.speed);
+    return;
+  }
+  this->m_stop();
+  this->control_thread = std::thread([&](){this->gripper.move(0, this->cfg.speed);});
 }
 }  // namespace hw
 }  // namespace rcs
\ No newline at end of file
diff --git a/src/hw/FrankaHand.h b/src/hw/FrankaHand.h
index 90bfc448..998ccc4b 100644
--- a/src/hw/FrankaHand.h
+++ b/src/hw/FrankaHand.h
@@ -7,6 +7,7 @@
 #include <Eigen/Core>
 #include <cmath>
 #include <string>
+#include <thread>
 
 // TODO: we need a common interface for the gripper, maybe we do use the hal
 // we need to create a robot class that has both the robot and the gripper
@@ -24,6 +25,7 @@ struct FHConfig : common::GConfig {
   double force = 5;
   double epsilon_inner = 0.005;
   double epsilon_outer = 0.005;
+  bool async_control = false;
 };
 
 struct FHState : common::GState {
@@ -40,7 +42,9 @@ class FrankaHand : public common::Gripper {
   FHConfig cfg;
   double max_width;
   double last_commanded_width;
+  std::optional<std::thread> control_thread = std::nullopt;
   void m_reset();
+  void m_stop();
 
  public:
   FrankaHand(const std::string &ip, const FHConfig &cfg);
diff --git a/src/pybind/rcsss.cpp b/src/pybind/rcsss.cpp
index 73ac7a6b..1664dfce 100644
--- a/src/pybind/rcsss.cpp
+++ b/src/pybind/rcsss.cpp
@@ -337,7 +337,8 @@ PYBIND11_MODULE(_core, m) {
       .def_readwrite("speed", &rcs::hw::FHConfig::speed)
       .def_readwrite("force", &rcs::hw::FHConfig::force)
       .def_readwrite("epsilon_inner", &rcs::hw::FHConfig::epsilon_inner)
-      .def_readwrite("epsilon_outer", &rcs::hw::FHConfig::epsilon_outer);
+      .def_readwrite("epsilon_outer", &rcs::hw::FHConfig::epsilon_outer)
+      .def_readwrite("async_control", &rcs::hw::FHConfig::async_control);
 
   py::class_<rcs::hw::FHState, rcs::common::GState>(hw, "FHState")
       .def(py::init<>())
@@ -362,8 +363,11 @@ PYBIND11_MODULE(_core, m) {
       .def("set_guiding_mode", &rcs::hw::FR3::set_guiding_mode,
            py::arg("enabled"))
       .def("zero_torque", &rcs::hw::FR3::zero_torque)
-      .def("osc_set_cartesian_position", &rcs::hw::FR3::osc_set_cartesian_position, py::arg("desired_pos_EE_in_base_frame"))
-      .def("controller_set_joint_position", &rcs::hw::FR3::controller_set_joint_position, py::arg("desired_q"))
+      .def("osc_set_cartesian_position",
+           &rcs::hw::FR3::osc_set_cartesian_position,
+           py::arg("desired_pos_EE_in_base_frame"))
+      .def("controller_set_joint_position",
+           &rcs::hw::FR3::controller_set_joint_position, py::arg("desired_q"))
       .def("stop_control_thread", &rcs::hw::FR3::stop_control_thread)
       .def("automatic_error_recovery", &rcs::hw::FR3::automatic_error_recovery)
       .def("double_tap_robot_to_continue",

From 074c50dc834ac7de34a509e66c9fcf395e1f5666 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Thu, 27 Mar 2025 15:49:00 +0100
Subject: [PATCH 27/56] refactor(hw control): async control in config

---
 python/rcsss/_core/hw/__init__.pyi |  1 +
 python/rcsss/control/vive.py       |  5 +-
 python/rcsss/envs/base.py          | 31 ++++--------
 python/rcsss/envs/factories.py     |  6 +--
 src/hw/FR3.cpp                     | 76 ++++++++++++++++--------------
 src/hw/FR3.h                       | 15 +++---
 src/pybind/rcsss.cpp               |  3 +-
 7 files changed, 63 insertions(+), 74 deletions(-)

diff --git a/python/rcsss/_core/hw/__init__.pyi b/python/rcsss/_core/hw/__init__.pyi
index f0661d0d..87eeeaf4 100644
--- a/python/rcsss/_core/hw/__init__.pyi
+++ b/python/rcsss/_core/hw/__init__.pyi
@@ -68,6 +68,7 @@ class FR3(rcsss._core.common.Robot):
     def zero_torque(self) -> None: ...
 
 class FR3Config(rcsss._core.common.RConfig):
+    async_control: bool
     controller: IKController
     guiding_mode_enabled: bool
     load_parameters: FR3Load | None
diff --git a/python/rcsss/control/vive.py b/python/rcsss/control/vive.py
index ee79caad..8ac0dce7 100644
--- a/python/rcsss/control/vive.py
+++ b/python/rcsss/control/vive.py
@@ -261,15 +261,14 @@ def main():
                 ip=ROBOT_IP,
                 camera_set = camera_set,
                 # collision_guard="lab",
-                robot_cfg=default_fr3_hw_robot_cfg(),
+                robot_cfg=default_fr3_hw_robot_cfg(async_control=True),
                 control_mode=ControlMode.CARTESIAN_TQuart,
                 # control_mode=ControlMode.JOINTS,
-                gripper_cfg=default_fr3_hw_gripper_cfg(),
+                gripper_cfg=default_fr3_hw_gripper_cfg(async_control=True),
                 max_relative_movement=(0.01, np.deg2rad(5)),
                 # max_relative_movement=(0.5, np.deg2rad(90)),
                 # max_relative_movement=np.deg2rad(20),
                 relative_to=RelativeTo.CONFIGURED_ORIGIN,
-                async_control=True,
             )
         else:
             env_rel = fr3_sim_env(
diff --git a/python/rcsss/envs/base.py b/python/rcsss/envs/base.py
index e70da752..f398589a 100644
--- a/python/rcsss/envs/base.py
+++ b/python/rcsss/envs/base.py
@@ -160,7 +160,7 @@ class FR3Env(gym.Env):
     y
     """
 
-    def __init__(self, robot: hw.FR3, control_mode: ControlMode, async_control: bool = False):
+    def __init__(self, robot: hw.FR3, control_mode: ControlMode):
         self.robot = robot
         self._control_mode_overrides = [control_mode]
         self.action_space: gym.spaces.Dict
@@ -179,7 +179,6 @@ def __init__(self, robot: hw.FR3, control_mode: ControlMode, async_control: bool
         self.trpy_key = get_space_keys(TRPYDictType)[0]
         self.tquart_key = get_space_keys(TQuartDictType)[0]
         self.prev_action: dict | None = None
-        self.async_control = async_control
 
     def get_unwrapped_control_mode(self, idx: int) -> ControlMode:
         """Returns the unwrapped control mode at a certain index. 0 is the base control mode, -1 the last."""
@@ -224,35 +223,21 @@ def step(self, action: CartOrJointContType) -> tuple[ArmObsType, float, bool, bo
             self.prev_action is None
             or not np.allclose(action_dict[self.joints_key], self.prev_action[self.joints_key], atol=1e-03, rtol=0)
         ):
-            # cast is needed because typed dicts cannot be checked at runtime
-            if not self.async_control:
-                self.robot.set_joint_position(action_dict[self.joints_key])
-            else:
-                self.robot.controller_set_joint_position(action_dict[self.joints_key])
+            self.robot.set_joint_position(action_dict[self.joints_key])
         elif self.get_base_control_mode() == ControlMode.CARTESIAN_TRPY and (
             self.prev_action is None
             or not np.allclose(action_dict[self.trpy_key], self.prev_action[self.trpy_key], atol=1e-03, rtol=0)
         ):
-            if not self.async_control:
-                self.robot.set_cartesian_position(
-                    common.Pose(translation=action_dict[self.trpy_key][:3], rpy_vector=action_dict[self.trpy_key][3:])
-                )
-            else:
-                self.robot.osc_set_cartesian_position(
-                    common.Pose(translation=action_dict[self.trpy_key][:3], rpy_vector=action_dict[self.trpy_key][3:])
-                )
+            self.robot.set_cartesian_position(
+                common.Pose(translation=action_dict[self.trpy_key][:3], rpy_vector=action_dict[self.trpy_key][3:])
+            )
         elif self.get_base_control_mode() == ControlMode.CARTESIAN_TQuart and (
             self.prev_action is None
             or not np.allclose(action_dict[self.tquart_key], self.prev_action[self.tquart_key], atol=1e-03, rtol=0)
         ):
-            if not self.async_control:
-                self.robot.set_cartesian_position(
-                    common.Pose(translation=action_dict[self.tquart_key][:3], quaternion=action_dict[self.tquart_key][3:])
-                )
-            else:
-                self.robot.osc_set_cartesian_position(
-                    common.Pose(translation=action_dict[self.tquart_key][:3], quaternion=action_dict[self.tquart_key][3:])
-                )
+            self.robot.set_cartesian_position(
+                common.Pose(translation=action_dict[self.tquart_key][:3], quaternion=action_dict[self.tquart_key][3:])
+            )
         self.prev_action = copy.deepcopy(action_dict)
         return self.get_obs(), 0, False, False, {}
 
diff --git a/python/rcsss/envs/factories.py b/python/rcsss/envs/factories.py
index 93db3407..e19cc9e1 100644
--- a/python/rcsss/envs/factories.py
+++ b/python/rcsss/envs/factories.py
@@ -34,11 +34,12 @@
 logger.setLevel(logging.INFO)
 
 
-def default_fr3_hw_robot_cfg():
+def default_fr3_hw_robot_cfg(async_control: bool = False):
     robot_cfg = FR3Config()
     robot_cfg.tcp_offset = rcsss.common.Pose(rcsss.common.FrankaHandTCPOffset())
     robot_cfg.speed_factor = 0.1
     robot_cfg.controller = IKController.robotics_library
+    robot_cfg.async_control = async_control
     return robot_cfg
 
 
@@ -90,7 +91,6 @@ def fr3_hw_env(
     max_relative_movement: float | tuple[float, float] | None = None,
     relative_to: RelativeTo = RelativeTo.LAST_STEP,
     urdf_path: str | PathLike | None = None,
-    async_control: bool = False,
 ) -> gym.Env:
     """
     Creates a hardware environment for the FR3 robot.
@@ -119,7 +119,7 @@ def fr3_hw_env(
     robot = rcsss.hw.FR3(ip, ik)
     robot.set_parameters(robot_cfg)
 
-    env: gym.Env = FR3Env(robot, ControlMode.JOINTS if collision_guard is not None else control_mode, async_control=async_control)
+    env: gym.Env = FR3Env(robot, ControlMode.JOINTS if collision_guard is not None else control_mode)
 
     env = FR3HW(env)
     if gripper_cfg is not None:
diff --git a/src/hw/FR3.cpp b/src/hw/FR3.cpp
index c038ab1b..45245685 100644
--- a/src/hw/FR3.cpp
+++ b/src/hw/FR3.cpp
@@ -87,7 +87,7 @@ void FR3::set_default_robot_behavior() {
 
 common::Pose FR3::get_cartesian_position() {
   common::Pose x;
-  if (!this->control_thread_running) {
+  if (this->running_controller == Controller::none){
     this->curr_state = this->robot.readOnce();
     x = common::Pose(this->curr_state.O_T_EE);
   } else {
@@ -99,14 +99,18 @@ common::Pose FR3::get_cartesian_position() {
 }
 
 void FR3::set_joint_position(const common::Vector7d &q) {
-  // TODO: max force?
+  if (this->cfg.async_control){
+    this->controller_set_joint_position(q);
+    return;
+  }
+  // sync control
   MotionGenerator motion_generator(this->cfg.speed_factor, q);
   this->robot.control(motion_generator);
 }
 
 common::Vector7d FR3::get_joint_position() {
   common::Vector7d joints;
-  if (!this->control_thread_running) {
+  if (this->running_controller == Controller::none) {
     this->curr_state = this->robot.readOnce();
     joints = common::Vector7d(this->curr_state.q.data());
   } else {
@@ -154,39 +158,38 @@ void TorqueSafetyGuardFn(std::array<double, 7> &tau_d_array, double min_torque,
   }
 }
 
-
-void FR3::controller_set_joint_position(
-    const common::Vector7d &desired_q) {
-
+void FR3::controller_set_joint_position(const common::Vector7d &desired_q) {
   // from deoxys/config/osc-position-controller.yml
   double traj_interpolation_time_fraction = 1.0; // in s
   // form deoxys/config/charmander.yml
   int policy_rate = 20;
   int traj_rate = 500;
 
-  if (!this->control_thread_running) {
+  if (this->running_controller == Controller::none) {
     this->controller_time = 0.0;
     this->get_joint_position();
+  } else if (this->running_controller != Controller::jsc) {
+    // runtime error
+    throw std::runtime_error("Controller type must but joint space but is " +
+                             std::to_string(this->running_controller) + ". To change controller type stop the current controller first.");
   } else {
     this->interpolator_mutex.lock();
   }
 
   this->joint_interpolator.Reset(
-      this->controller_time, Eigen::Map<common::Vector7d>(this->curr_state.q.data()), desired_q, policy_rate, traj_rate,
-      traj_interpolation_time_fraction);
+      this->controller_time,
+      Eigen::Map<common::Vector7d>(this->curr_state.q.data()), desired_q,
+      policy_rate, traj_rate, traj_interpolation_time_fraction);
 
   // if not thread is running, then start
-  if (!this->control_thread_running) {
-    this->control_thread_running = true;
+  if (this->running_controller == Controller::none) {
+    this->running_controller= Controller::jsc;
     this->control_thread = std::thread(&FR3::joint_controller, this);
   } else {
     this->interpolator_mutex.unlock();
   }
 }
 
-// todos
-// - controller type
-// - joint type
 void FR3::osc_set_cartesian_position(
     const common::Pose &desired_pose_EE_in_base_frame) {
   // from deoxys/config/osc-position-controller.yml
@@ -195,37 +198,38 @@ void FR3::osc_set_cartesian_position(
   int policy_rate = 20;
   int traj_rate = 500;
 
-  if (!this->control_thread_running) {
+  if (this->running_controller == Controller::none) {
     this->controller_time = 0.0;
     this->get_cartesian_position();
+  } else if (this->running_controller != Controller::osc) {
+    throw std::runtime_error("Controller type must but osc but is " +
+                             std::to_string(this->running_controller) + ". To change controller type stop the current controller first.");
   } else {
     this->interpolator_mutex.lock();
   }
 
   common::Pose curr_pose(this->curr_state.O_T_EE);
   this->traj_interpolator.Reset(
-      this->controller_time, curr_pose.translation(),
-      curr_pose.quaternion(), desired_pose_EE_in_base_frame.translation(),
+      this->controller_time, curr_pose.translation(), curr_pose.quaternion(),
+      desired_pose_EE_in_base_frame.translation(),
       desired_pose_EE_in_base_frame.quaternion(), policy_rate, traj_rate,
       traj_interpolation_time_fraction);
 
   // if not thread is running, then start
-  if (!this->control_thread_running) {
-    this->control_thread_running = true;
+  if (this->running_controller == Controller::none) {
+    this->running_controller= Controller::osc;
     this->control_thread = std::thread(&FR3::osc, this);
   } else {
     this->interpolator_mutex.unlock();
   }
 }
 
-
 // method to stop thread
 void FR3::stop_control_thread() {
-  if (this->control_thread.has_value() && this->control_thread_running) {
-    this->control_thread_running = false;
+  if (this->control_thread.has_value() && this->running_controller != Controller::none) {
+    this->running_controller = Controller::none;
     this->control_thread->join();
     this->control_thread.reset();
-    // this->osc_desired_pos_EE_in_base_frame.reset();
   }
 }
 
@@ -280,8 +284,7 @@ void FR3::osc() {
         std::chrono::high_resolution_clock::now();
 
     // torques handler
-    if (!this->control_thread_running) {
-      // TODO: test if this also works when the robot is moving
+    if (this->running_controller == Controller::none) {
       franka::Torques zero_torques{{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}};
       return franka::MotionFinished(zero_torques);
     }
@@ -454,19 +457,16 @@ void FR3::joint_controller() {
   //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
   //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}});
 
-
   // deoxys/config/joint-impedance-controller.yml
   common::Vector7d Kp;
   Kp << 100., 100., 100., 100., 75., 150., 50.;
 
   common::Vector7d Kd;
   Kd << 20., 20., 20., 20., 7.5, 15.0, 5.0;
- 
 
   Eigen::Array<double, 7, 1> joint_max_;
   Eigen::Array<double, 7, 1> joint_min_;
 
-
   joint_max_ << 2.8978, 1.7628, 2.8973, -0.0698, 2.8973, 3.7525, 2.8973;
   joint_min_ << -2.8973, -1.7628, -2.8973, -3.0718, -2.8973, -0.0175, -2.8973;
 
@@ -475,29 +475,25 @@ void FR3::joint_controller() {
     std::chrono::high_resolution_clock::time_point t1 =
         std::chrono::high_resolution_clock::now();
 
-
     // torques handler
-    if (!this->control_thread_running) {
+    if (this->running_controller == Controller::none) {
       // TODO: test if this also works when the robot is moving
       franka::Torques zero_torques{{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}};
       return franka::MotionFinished(zero_torques);
     }
 
-
     common::Vector7d desired_q;
     common::Pose pose(robot_state.O_T_EE);
 
     this->interpolator_mutex.lock();
     this->curr_state = robot_state;
     this->controller_time += period.toSec();
-    this->joint_interpolator.GetNextStep(this->controller_time,
-                                        desired_q);
+    this->joint_interpolator.GetNextStep(this->controller_time, desired_q);
     this->interpolator_mutex.unlock();
     // end torques handler
 
     Eigen::Matrix<double, 7, 1> tau_d;
 
-
     // Current joint velocity
     Eigen::Map<const Eigen::Matrix<double, 7, 1>> dq(robot_state.dq.data());
 
@@ -506,7 +502,6 @@ void FR3::joint_controller() {
 
     Eigen::MatrixXd joint_pos_error(7, 1);
 
-
     joint_pos_error << desired_q - q;
 
     tau_d << Kp.cwiseProduct(joint_pos_error) - Kd.cwiseProduct(dq);
@@ -634,6 +629,11 @@ std::optional<std::shared_ptr<common::IK>> FR3::get_ik() { return this->m_ik; }
 
 void FR3::set_cartesian_position(const common::Pose &x) {
   // pose is assumed to be in the robots coordinate frame
+  if (this->cfg.async_control){
+    this->osc_set_cartesian_position(x);
+    return;
+  }
+  // TODO: this should handled with tcp offset config
   common::Pose nominal_end_effector_frame_value;
   if (this->cfg.nominal_end_effector_frame.has_value()) {
     nominal_end_effector_frame_value =
@@ -641,6 +641,10 @@ void FR3::set_cartesian_position(const common::Pose &x) {
   } else {
     nominal_end_effector_frame_value = common::Pose::Identity();
   }
+  // nominal end effector frame should be on top of tcp offset as franka already takes care of
+  // the default franka hand offset
+  // lets add a franka hand offset
+
 
   if (this->cfg.controller == IKController::internal) {
     // if gripper is attached the tcp offset will automatically be applied
diff --git a/src/hw/FR3.h b/src/hw/FR3.h
index 5f1432a6..57a68931 100644
--- a/src/hw/FR3.h
+++ b/src/hw/FR3.h
@@ -2,18 +2,18 @@
 #define RCS_FR3_H
 
 #include <common/IK.h>
+#include <common/LinearPoseTrajInterpolator.h>
 #include <common/Pose.h>
 #include <common/Robot.h>
 #include <common/utils.h>
-#include <common/LinearPoseTrajInterpolator.h>
 #include <franka/robot.h>
 
 #include <cmath>
 #include <memory>
+#include <mutex>
 #include <optional>
 #include <string>
 #include <thread>
-#include <mutex>
 
 namespace rcs {
 namespace hw {
@@ -29,6 +29,7 @@ struct FR3Load {
   std::optional<Eigen::Matrix3d> load_inertia;
 };
 enum IKController { internal = 0, robotics_library };
+enum Controller { none = 0, jsc, osc };
 struct FR3Config : common::RConfig {
   // TODO: max force and elbow?
   // TODO: we can either write specific bindings for each, or we use python
@@ -40,6 +41,7 @@ struct FR3Config : common::RConfig {
   std::optional<common::Pose> nominal_end_effector_frame = std::nullopt;
   std::optional<common::Pose> world_to_robot = std::nullopt;
   common::Pose tcp_offset = common::Pose::Identity();
+  bool async_control = false;
 };
 
 struct FR3State : common::RState {};
@@ -52,14 +54,10 @@ class FR3 : public common::Robot {
   std::optional<std::thread> control_thread = std::nullopt;
   common::LinearPoseTrajInterpolator traj_interpolator;
   double controller_time = 0.0;
-
-
   common::LinearJointPositionTrajInterpolator joint_interpolator;
-
   franka::RobotState curr_state;
-
-  bool control_thread_running = false;
   std::mutex interpolator_mutex;
+  Controller running_controller = Controller::none;
 
  public:
   FR3(const std::string &ip,
@@ -84,7 +82,8 @@ class FR3 : public common::Robot {
   void set_guiding_mode(bool enabled);
 
   void controller_set_joint_position(const common::Vector7d &desired_q);
-  void osc_set_cartesian_position(const common::Pose &desired_pose_EE_in_base_frame);
+  void osc_set_cartesian_position(
+      const common::Pose &desired_pose_EE_in_base_frame);
 
   void stop_control_thread();
 
diff --git a/src/pybind/rcsss.cpp b/src/pybind/rcsss.cpp
index 1664dfce..2b9e8de9 100644
--- a/src/pybind/rcsss.cpp
+++ b/src/pybind/rcsss.cpp
@@ -329,7 +329,8 @@ PYBIND11_MODULE(_core, m) {
       .def_readwrite("nominal_end_effector_frame",
                      &rcs::hw::FR3Config::nominal_end_effector_frame)
       .def_readwrite("world_to_robot", &rcs::hw::FR3Config::world_to_robot)
-      .def_readwrite("tcp_offset", &rcs::hw::FR3Config::tcp_offset);
+      .def_readwrite("tcp_offset", &rcs::hw::FR3Config::tcp_offset)
+      .def_readwrite("async_control", &rcs::hw::FR3Config::async_control);
 
   py::class_<rcs::hw::FHConfig, rcs::common::GConfig>(hw, "FHConfig")
       .def(py::init<>())

From dd48836858c8979ebffe9f0bc9aeb0858283e020 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Thu, 27 Mar 2025 16:11:26 +0100
Subject: [PATCH 28/56] chore: renamed ikcontroller to ik_solver

---
 python/examples/fr3.py             |  4 ++--
 python/rcsss/_core/hw/__init__.pyi | 26 +++++++++++++-------------
 python/rcsss/control/fr3_desk.py   |  2 +-
 python/rcsss/envs/factories.py     |  4 ++--
 src/hw/FR3.cpp                     |  4 ++--
 src/hw/FR3.h                       |  4 ++--
 src/pybind/rcsss.cpp               |  8 ++++----
 7 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/python/examples/fr3.py b/python/examples/fr3.py
index 5bf3b325..80861f85 100644
--- a/python/examples/fr3.py
+++ b/python/examples/fr3.py
@@ -4,7 +4,7 @@
 import numpy as np
 import rcsss
 from rcsss import sim
-from rcsss._core.hw import FR3Config, IKController
+from rcsss._core.hw import FR3Config, IKSolver
 from rcsss._core.sim import CameraType
 from rcsss.camera.sim import SimCameraConfig, SimCameraSet, SimCameraSetConfig
 from rcsss.control.fr3_desk import FCI, Desk, DummyResourceManager
@@ -95,7 +95,7 @@ def main():
             robot = rcsss.hw.FR3(ROBOT_IP, ik)
             robot_cfg = FR3Config()
             robot_cfg.tcp_offset = rcsss.common.Pose(rcsss.common.FrankaHandTCPOffset())
-            robot_cfg.controller = IKController.robotics_library
+            robot_cfg.ik_solver = IKSolver.rcs
             robot.set_parameters(robot_cfg)
 
             gripper_cfg_hw = rcsss.hw.FHConfig()
diff --git a/python/rcsss/_core/hw/__init__.pyi b/python/rcsss/_core/hw/__init__.pyi
index 87eeeaf4..a7caf865 100644
--- a/python/rcsss/_core/hw/__init__.pyi
+++ b/python/rcsss/_core/hw/__init__.pyi
@@ -19,10 +19,10 @@ __all__ = [
     "FR3Load",
     "FR3State",
     "FrankaHand",
-    "IKController",
+    "IKSolver",
     "exceptions",
-    "internal",
-    "robotics_library",
+    "franka",
+    "rcs",
 ]
 
 class FHConfig(rcsss._core.common.GConfig):
@@ -69,8 +69,8 @@ class FR3(rcsss._core.common.Robot):
 
 class FR3Config(rcsss._core.common.RConfig):
     async_control: bool
-    controller: IKController
     guiding_mode_enabled: bool
+    ik_solver: IKSolver
     load_parameters: FR3Load | None
     nominal_end_effector_frame: rcsss._core.common.Pose | None
     speed_factor: float
@@ -95,20 +95,20 @@ class FrankaHand(rcsss._core.common.Gripper):
     def is_grasped(self) -> bool: ...
     def set_parameters(self, cfg: FHConfig) -> bool: ...
 
-class IKController:
+class IKSolver:
     """
     Members:
 
-      internal
+      franka
 
-      robotics_library
+      rcs
     """
 
     __members__: typing.ClassVar[
-        dict[str, IKController]
-    ]  # value = {'internal': <IKController.internal: 0>, 'robotics_library': <IKController.robotics_library: 1>}
-    internal: typing.ClassVar[IKController]  # value = <IKController.internal: 0>
-    robotics_library: typing.ClassVar[IKController]  # value = <IKController.robotics_library: 1>
+        dict[str, IKSolver]
+    ]  # value = {'franka': <IKSolver.franka: 0>, 'rcs': <IKSolver.rcs: 1>}
+    franka: typing.ClassVar[IKSolver]  # value = <IKSolver.franka: 0>
+    rcs: typing.ClassVar[IKSolver]  # value = <IKSolver.rcs: 1>
     def __eq__(self, other: typing.Any) -> bool: ...
     def __getstate__(self) -> int: ...
     def __hash__(self) -> int: ...
@@ -124,5 +124,5 @@ class IKController:
     @property
     def value(self) -> int: ...
 
-internal: IKController  # value = <IKController.internal: 0>
-robotics_library: IKController  # value = <IKController.robotics_library: 1>
+franka: IKSolver  # value = <IKSolver.franka: 0>
+rcs: IKSolver  # value = <IKSolver.rcs: 1>
diff --git a/python/rcsss/control/fr3_desk.py b/python/rcsss/control/fr3_desk.py
index d20aa73a..036bc5cc 100644
--- a/python/rcsss/control/fr3_desk.py
+++ b/python/rcsss/control/fr3_desk.py
@@ -31,7 +31,7 @@ def home(ip: str, username: str, password: str, shut: bool, unlock: bool = False
         f = rcsss.hw.FR3(ip)
         config = rcsss.hw.FR3Config()
         config.speed_factor = 0.7
-        config.controller = rcsss.hw.IKController.internal
+        config.ik_solver = rcsss.hw.IKSolver.franka
         config.guiding_mode_enabled = True
         f.set_parameters(config)
         config_hand = rcsss.hw.FHConfig()
diff --git a/python/rcsss/envs/factories.py b/python/rcsss/envs/factories.py
index e19cc9e1..faa12efe 100644
--- a/python/rcsss/envs/factories.py
+++ b/python/rcsss/envs/factories.py
@@ -7,7 +7,7 @@
 import rcsss
 from gymnasium.envs.registration import EnvCreator
 from rcsss import sim
-from rcsss._core.hw import FR3Config, IKController
+from rcsss._core.hw import FR3Config, IKSolver
 from rcsss._core.sim import CameraType
 from rcsss.camera.hw import BaseHardwareCameraSet
 from rcsss.camera.interface import BaseCameraConfig
@@ -38,7 +38,7 @@ def default_fr3_hw_robot_cfg(async_control: bool = False):
     robot_cfg = FR3Config()
     robot_cfg.tcp_offset = rcsss.common.Pose(rcsss.common.FrankaHandTCPOffset())
     robot_cfg.speed_factor = 0.1
-    robot_cfg.controller = IKController.robotics_library
+    robot_cfg.ik_solver = IKSolver.rcs
     robot_cfg.async_control = async_control
     return robot_cfg
 
diff --git a/src/hw/FR3.cpp b/src/hw/FR3.cpp
index 45245685..37303f0a 100644
--- a/src/hw/FR3.cpp
+++ b/src/hw/FR3.cpp
@@ -646,13 +646,13 @@ void FR3::set_cartesian_position(const common::Pose &x) {
   // lets add a franka hand offset
 
 
-  if (this->cfg.controller == IKController::internal) {
+  if (this->cfg.ik_solver == IKSolver::franka) {
     // if gripper is attached the tcp offset will automatically be applied
     // by libfranka
     this->robot.setEE(nominal_end_effector_frame_value.affine_array());
     this->set_cartesian_position_internal(x, 1.0, std::nullopt, std::nullopt);
 
-  } else if (this->cfg.controller == IKController::robotics_library) {
+  } else if (this->cfg.ik_solver == IKSolver::rcs) {
     this->set_cartesian_position_ik(x);
   }
 }
diff --git a/src/hw/FR3.h b/src/hw/FR3.h
index 57a68931..30160ea9 100644
--- a/src/hw/FR3.h
+++ b/src/hw/FR3.h
@@ -28,13 +28,13 @@ struct FR3Load {
   std::optional<Eigen::Vector3d> f_x_cload;
   std::optional<Eigen::Matrix3d> load_inertia;
 };
-enum IKController { internal = 0, robotics_library };
+enum IKSolver { franka = 0, rcs };
 enum Controller { none = 0, jsc, osc };
 struct FR3Config : common::RConfig {
   // TODO: max force and elbow?
   // TODO: we can either write specific bindings for each, or we use python
   // dictionaries with these objects
-  IKController controller = IKController::internal;
+  IKSolver ik_solver = IKSolver::franka;
   bool guiding_mode_enabled = true;
   double speed_factor = DEFAULT_SPEED_FACTOR;
   std::optional<FR3Load> load_parameters = std::nullopt;
diff --git a/src/pybind/rcsss.cpp b/src/pybind/rcsss.cpp
index 2b9e8de9..8a4ba477 100644
--- a/src/pybind/rcsss.cpp
+++ b/src/pybind/rcsss.cpp
@@ -314,14 +314,14 @@ PYBIND11_MODULE(_core, m) {
       .def_readwrite("f_x_cload", &rcs::hw::FR3Load::f_x_cload)
       .def_readwrite("load_inertia", &rcs::hw::FR3Load::load_inertia);
 
-  py::enum_<rcs::hw::IKController>(hw, "IKController")
-      .value("internal", rcs::hw::IKController::internal)
-      .value("robotics_library", rcs::hw::IKController::robotics_library)
+  py::enum_<rcs::hw::IKSolver>(hw, "IKSolver")
+      .value("franka", rcs::hw::IKSolver::franka)
+      .value("rcs", rcs::hw::IKSolver::rcs)
       .export_values();
 
   py::class_<rcs::hw::FR3Config, rcs::common::RConfig>(hw, "FR3Config")
       .def(py::init<>())
-      .def_readwrite("controller", &rcs::hw::FR3Config::controller)
+      .def_readwrite("ik_solver", &rcs::hw::FR3Config::ik_solver)
       .def_readwrite("speed_factor", &rcs::hw::FR3Config::speed_factor)
       .def_readwrite("guiding_mode_enabled",
                      &rcs::hw::FR3Config::guiding_mode_enabled)

From cdad0870a04db4b5dbe11636b53c28367ac2cc79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Fri, 28 Mar 2025 09:10:22 +0100
Subject: [PATCH 29/56] refactor(hw): guiding mode configure DOFs

---
 python/rcsss/_core/hw/__init__.pyi | 12 ++++++++++--
 src/hw/FR3.cpp                     | 12 ++++++------
 src/hw/FR3.h                       |  5 +++--
 src/pybind/rcsss.cpp               |  4 +---
 4 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/python/rcsss/_core/hw/__init__.pyi b/python/rcsss/_core/hw/__init__.pyi
index a7caf865..9e2d3a1c 100644
--- a/python/rcsss/_core/hw/__init__.pyi
+++ b/python/rcsss/_core/hw/__init__.pyi
@@ -62,14 +62,22 @@ class FR3(rcsss._core.common.Robot):
     ) -> None: ...
     def set_cartesian_position_internal(self, pose: rcsss._core.common.Pose) -> None: ...
     def set_default_robot_behavior(self) -> None: ...
-    def set_guiding_mode(self, enabled: bool) -> None: ...
+    def set_guiding_mode(
+        self,
+        x: bool = True,
+        y: bool = True,
+        z: bool = True,
+        roll: bool = True,
+        pitch: bool = True,
+        yaw: bool = True,
+        elbow: bool = True,
+    ) -> None: ...
     def set_parameters(self, cfg: FR3Config) -> bool: ...
     def stop_control_thread(self) -> None: ...
     def zero_torque(self) -> None: ...
 
 class FR3Config(rcsss._core.common.RConfig):
     async_control: bool
-    guiding_mode_enabled: bool
     ik_solver: IKSolver
     load_parameters: FR3Load | None
     nominal_end_effector_frame: rcsss._core.common.Pose | None
diff --git a/src/hw/FR3.cpp b/src/hw/FR3.cpp
index 37303f0a..e5c4ca5f 100644
--- a/src/hw/FR3.cpp
+++ b/src/hw/FR3.cpp
@@ -24,7 +24,7 @@ FR3::FR3(const std::string &ip, std::optional<std::shared_ptr<common::IK>> ik,
     : robot(ip), m_ik(ik) {
   // set collision behavior and impedance
   this->set_default_robot_behavior();
-  this->set_guiding_mode(true);
+  this->set_guiding_mode(true, true, true, true, true, true, true);
 
   if (cfg.has_value()) {
     this->cfg = cfg.value();
@@ -121,11 +121,11 @@ common::Vector7d FR3::get_joint_position() {
   return joints;
 }
 
-void FR3::set_guiding_mode(bool enabled) {
-  std::array<bool, 6> activated;
-  activated.fill(enabled);
-  this->robot.setGuidingMode(activated, enabled);
-  this->cfg.guiding_mode_enabled = enabled;
+void FR3::set_guiding_mode(bool x, bool y, bool z,
+                           bool roll, bool pitch, bool yaw,
+                           bool elbow) {
+  std::array<bool, 6> activated = {x, y, z, roll, pitch, yaw};
+  this->robot.setGuidingMode(activated, elbow);
 }
 
 void PInverse(const Eigen::MatrixXd &M, Eigen::MatrixXd &M_inv,
diff --git a/src/hw/FR3.h b/src/hw/FR3.h
index 30160ea9..f946ef70 100644
--- a/src/hw/FR3.h
+++ b/src/hw/FR3.h
@@ -35,7 +35,6 @@ struct FR3Config : common::RConfig {
   // TODO: we can either write specific bindings for each, or we use python
   // dictionaries with these objects
   IKSolver ik_solver = IKSolver::franka;
-  bool guiding_mode_enabled = true;
   double speed_factor = DEFAULT_SPEED_FACTOR;
   std::optional<FR3Load> load_parameters = std::nullopt;
   std::optional<common::Pose> nominal_end_effector_frame = std::nullopt;
@@ -79,7 +78,9 @@ class FR3 : public common::Robot {
 
   common::Vector7d get_joint_position() override;
 
-  void set_guiding_mode(bool enabled);
+  void set_guiding_mode(bool x, bool y, bool z,
+                           bool roll, bool pitch, bool yaw,
+                           bool elbow);
 
   void controller_set_joint_position(const common::Vector7d &desired_q);
   void osc_set_cartesian_position(
diff --git a/src/pybind/rcsss.cpp b/src/pybind/rcsss.cpp
index 8a4ba477..4c3357f0 100644
--- a/src/pybind/rcsss.cpp
+++ b/src/pybind/rcsss.cpp
@@ -323,8 +323,6 @@ PYBIND11_MODULE(_core, m) {
       .def(py::init<>())
       .def_readwrite("ik_solver", &rcs::hw::FR3Config::ik_solver)
       .def_readwrite("speed_factor", &rcs::hw::FR3Config::speed_factor)
-      .def_readwrite("guiding_mode_enabled",
-                     &rcs::hw::FR3Config::guiding_mode_enabled)
       .def_readwrite("load_parameters", &rcs::hw::FR3Config::load_parameters)
       .def_readwrite("nominal_end_effector_frame",
                      &rcs::hw::FR3Config::nominal_end_effector_frame)
@@ -362,7 +360,7 @@ PYBIND11_MODULE(_core, m) {
       .def("set_default_robot_behavior",
            &rcs::hw::FR3::set_default_robot_behavior)
       .def("set_guiding_mode", &rcs::hw::FR3::set_guiding_mode,
-           py::arg("enabled"))
+           py::arg("x") = true, py::arg("y") = true, py::arg("z") = true, py::arg("roll") = true, py::arg("pitch") = true, py::arg("yaw") = true, py::arg("elbow") = true)
       .def("zero_torque", &rcs::hw::FR3::zero_torque)
       .def("osc_set_cartesian_position",
            &rcs::hw::FR3::osc_set_cartesian_position,

From 48602127e54f53a2f434b208fa535d46daf280f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Fri, 28 Mar 2025 09:33:04 +0100
Subject: [PATCH 30/56] refactor(hw): zero torque guiding integrated into
 control thread flow

---
 python/rcsss/_core/hw/__init__.pyi |  2 +-
 src/hw/FR3.cpp                     | 36 ++++++++++++++++++++++++------
 src/hw/FR3.h                       | 13 ++++++-----
 src/pybind/rcsss.cpp               |  2 +-
 4 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/python/rcsss/_core/hw/__init__.pyi b/python/rcsss/_core/hw/__init__.pyi
index 9e2d3a1c..16df7b80 100644
--- a/python/rcsss/_core/hw/__init__.pyi
+++ b/python/rcsss/_core/hw/__init__.pyi
@@ -74,7 +74,7 @@ class FR3(rcsss._core.common.Robot):
     ) -> None: ...
     def set_parameters(self, cfg: FR3Config) -> bool: ...
     def stop_control_thread(self) -> None: ...
-    def zero_torque(self) -> None: ...
+    def zero_torque_guiding(self) -> None: ...
 
 class FR3Config(rcsss._core.common.RConfig):
     async_control: bool
diff --git a/src/hw/FR3.cpp b/src/hw/FR3.cpp
index e5c4ca5f..28be62e6 100644
--- a/src/hw/FR3.cpp
+++ b/src/hw/FR3.cpp
@@ -238,6 +238,10 @@ void FR3::osc() {
 
   this->controller_time = 0.0;
 
+  // conservative collision and impedance behavior
+  this->set_default_robot_behavior();
+
+  // high collision threshold values for high impedance
   // this->robot.setCollisionBehavior(
   //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
   //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
@@ -451,6 +455,11 @@ void FR3::osc() {
 void FR3::joint_controller() {
   franka::Model model = this->robot.loadModel();
   this->controller_time = 0.0;
+
+  // conservative collision and impedance behavior
+  this->set_default_robot_behavior();
+
+  // high collision threshold values for high impedance
   // this->robot.setCollisionBehavior(
   //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
   //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
@@ -537,20 +546,33 @@ void FR3::joint_controller() {
   });
 }
 
-void FR3::zero_torque() {
-  // start thread
+void FR3::zero_torque_guiding(){
+
+  if (this->running_controller != Controller::none) {
+    throw std::runtime_error("A controller is currently running. Please stop it first.");
+  }
+    this->controller_time = 0.0;
+    this->running_controller= Controller::ztc;
+    this->control_thread = std::thread(&FR3::zero_torque_controller, this);
+}
+
+void FR3::zero_torque_controller() {
 
+  // high collision threshold values for high impedance
   robot.setCollisionBehavior(
       {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
       {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
       {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
       {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}});
 
-  double time = 0.0;
-  this->robot.control([&](const franka::RobotState &state,
-                          franka::Duration time_step) -> franka::Torques {
-    time += time_step.toSec();
-    if (time > 30) {
+  this->controller_time = 0.0;
+  this->robot.control([&](const franka::RobotState &robot_state,
+                          franka::Duration period) -> franka::Torques {
+    this->interpolator_mutex.lock();
+    this->curr_state = robot_state;
+    this->controller_time += period.toSec();
+    this->interpolator_mutex.unlock();
+    if (this->running_controller == Controller::none) {
       // stop
       return franka::MotionFinished(franka::Torques({0, 0, 0, 0, 0, 0, 0}));
     }
diff --git a/src/hw/FR3.h b/src/hw/FR3.h
index f946ef70..4f3bc568 100644
--- a/src/hw/FR3.h
+++ b/src/hw/FR3.h
@@ -29,7 +29,9 @@ struct FR3Load {
   std::optional<Eigen::Matrix3d> load_inertia;
 };
 enum IKSolver { franka = 0, rcs };
-enum Controller { none = 0, jsc, osc };
+// modes: joint-space control, operational-space control, zero-torque
+// control
+enum Controller { none = 0, jsc, osc, ztc };
 struct FR3Config : common::RConfig {
   // TODO: max force and elbow?
   // TODO: we can either write specific bindings for each, or we use python
@@ -57,6 +59,9 @@ class FR3 : public common::Robot {
   franka::RobotState curr_state;
   std::mutex interpolator_mutex;
   Controller running_controller = Controller::none;
+  void osc();
+  void joint_controller();
+  void zero_torque_controller();
 
  public:
   FR3(const std::string &ip,
@@ -85,14 +90,10 @@ class FR3 : public common::Robot {
   void controller_set_joint_position(const common::Vector7d &desired_q);
   void osc_set_cartesian_position(
       const common::Pose &desired_pose_EE_in_base_frame);
+  void zero_torque_guiding();
 
   void stop_control_thread();
 
-  void osc();
-  void joint_controller();
-
-  void zero_torque();
-
   void move_home() override;
 
   void automatic_error_recovery();
diff --git a/src/pybind/rcsss.cpp b/src/pybind/rcsss.cpp
index 4c3357f0..5554c6ae 100644
--- a/src/pybind/rcsss.cpp
+++ b/src/pybind/rcsss.cpp
@@ -361,7 +361,7 @@ PYBIND11_MODULE(_core, m) {
            &rcs::hw::FR3::set_default_robot_behavior)
       .def("set_guiding_mode", &rcs::hw::FR3::set_guiding_mode,
            py::arg("x") = true, py::arg("y") = true, py::arg("z") = true, py::arg("roll") = true, py::arg("pitch") = true, py::arg("yaw") = true, py::arg("elbow") = true)
-      .def("zero_torque", &rcs::hw::FR3::zero_torque)
+      .def("zero_torque_guiding", &rcs::hw::FR3::zero_torque_guiding)
       .def("osc_set_cartesian_position",
            &rcs::hw::FR3::osc_set_cartesian_position,
            py::arg("desired_pos_EE_in_base_frame"))

From bcfc8c00c10d0ecb16c9cc935416653b5e3764c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Fri, 28 Mar 2025 10:00:30 +0100
Subject: [PATCH 31/56] feat(hw gripper): bool state and is moving

---
 python/rcsss/_core/hw/__init__.pyi |  4 ++++
 src/hw/FrankaHand.cpp              | 35 +++++++++++++++++++++++++-----
 src/hw/FrankaHand.h                |  4 ++++
 src/pybind/rcsss.cpp               |  2 ++
 4 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/python/rcsss/_core/hw/__init__.pyi b/python/rcsss/_core/hw/__init__.pyi
index 16df7b80..e80e1eda 100644
--- a/python/rcsss/_core/hw/__init__.pyi
+++ b/python/rcsss/_core/hw/__init__.pyi
@@ -37,8 +37,12 @@ class FHConfig(rcsss._core.common.GConfig):
 class FHState(rcsss._core.common.GState):
     def __init__(self) -> None: ...
     @property
+    def bool_state(self) -> bool: ...
+    @property
     def is_grasped(self) -> bool: ...
     @property
+    def is_moving(self) -> bool: ...
+    @property
     def last_commanded_width(self) -> float: ...
     @property
     def max_unnormalized_width(self) -> float: ...
diff --git a/src/hw/FrankaHand.cpp b/src/hw/FrankaHand.cpp
index 8e8f1002..8d8fcd13 100644
--- a/src/hw/FrankaHand.cpp
+++ b/src/hw/FrankaHand.cpp
@@ -45,6 +45,8 @@ FHState *FrankaHand::get_state() {
   state->temperature = gripper_state.temperature;
   state->max_unnormalized_width = this->max_width;
   state->last_commanded_width = this->last_commanded_width;
+  state->bool_state = this->last_commanded_width > 0;
+  state->is_moving = this->is_moving;
   return state;
 }
 
@@ -74,7 +76,7 @@ void FrankaHand::m_stop(){
     this->control_thread->join();
     this->control_thread.reset();
   }
-
+  this->is_moving = false;
 }
 
 void FrankaHand::m_reset() {
@@ -83,7 +85,9 @@ void FrankaHand::m_reset() {
   franka::GripperState gripper_state = this->gripper.readOnce();
   this->max_width = gripper_state.max_width - 0.001;
   this->last_commanded_width = this->max_width;
+  this->is_moving = true;
   this->gripper.move(this->max_width, this->cfg.speed);
+  this->is_moving = false;
 }
 void FrankaHand::reset() { this->m_reset(); }
 
@@ -95,36 +99,57 @@ bool FrankaHand::is_grasped() {
 bool FrankaHand::homing() {
   // Do a homing in order to estimate the maximum
   // grasping width with the current fingers.
-  return this->gripper.homing();
+  this->is_moving = true;
+  bool success =  this->gripper.homing();
+  this->is_moving = false;
+  return success;
 }
 
 void FrankaHand::grasp() {
   this->last_commanded_width = 0;
   if (!this->cfg.async_control) {
+    this->is_moving = true;
     this->gripper.grasp(0, this->cfg.speed, this->cfg.force, 1, 1);
+    this->is_moving = false;
     return;
   }
   this->m_stop();
-  this->control_thread = std::thread([&](){this->gripper.grasp(0, this->cfg.speed, this->cfg.force, 1, 1);});
+  this->control_thread = std::thread([&](){
+    this->is_moving = true;
+    this->gripper.grasp(0, this->cfg.speed, this->cfg.force, 1, 1);
+    this->is_moving = false;
+  });
 }
 
 void FrankaHand::open() {
   this->last_commanded_width = this->max_width;
   if (!this->cfg.async_control) {
+    this->is_moving = true;
     this->gripper.move(this->max_width, this->cfg.speed);
+    this->is_moving = false;
     return;
   }
   this->m_stop();
-  this->control_thread = std::thread([&](){this->gripper.move(this->max_width, this->cfg.speed);});
+  this->control_thread = std::thread([&](){
+    this->is_moving = true;
+    this->gripper.move(this->max_width, this->cfg.speed);
+    this->is_moving = false;
+  });
 }
 void FrankaHand::shut() {
   this->last_commanded_width = 0;
   if (!this->cfg.async_control) {
+    this->is_moving = true;
     this->gripper.move(0, this->cfg.speed);
+    this->is_moving = false;
     return;
   }
   this->m_stop();
-  this->control_thread = std::thread([&](){this->gripper.move(0, this->cfg.speed);});
+  this->control_thread = std::thread([&](){
+    this->is_moving = true;
+    this->gripper.move(0, this->cfg.speed);
+    this->is_moving = false;
+  });
 }
 }  // namespace hw
 }  // namespace rcs
\ No newline at end of file
diff --git a/src/hw/FrankaHand.h b/src/hw/FrankaHand.h
index 998ccc4b..7f1cbb31 100644
--- a/src/hw/FrankaHand.h
+++ b/src/hw/FrankaHand.h
@@ -30,10 +30,13 @@ struct FHConfig : common::GConfig {
 
 struct FHState : common::GState {
   double width;
+  // true is open
+  bool bool_state;
   bool is_grasped;
   uint16_t temperature;
   double last_commanded_width;
   double max_unnormalized_width;
+  bool is_moving;
 };
 
 class FrankaHand : public common::Gripper {
@@ -42,6 +45,7 @@ class FrankaHand : public common::Gripper {
   FHConfig cfg;
   double max_width;
   double last_commanded_width;
+  bool is_moving = false;
   std::optional<std::thread> control_thread = std::nullopt;
   void m_reset();
   void m_stop();
diff --git a/src/pybind/rcsss.cpp b/src/pybind/rcsss.cpp
index 5554c6ae..d6298a5a 100644
--- a/src/pybind/rcsss.cpp
+++ b/src/pybind/rcsss.cpp
@@ -343,6 +343,8 @@ PYBIND11_MODULE(_core, m) {
       .def(py::init<>())
       .def_readonly("width", &rcs::hw::FHState::width)
       .def_readonly("is_grasped", &rcs::hw::FHState::is_grasped)
+      .def_readonly("is_moving", &rcs::hw::FHState::is_moving)
+      .def_readonly("bool_state", &rcs::hw::FHState::bool_state)
       .def_readonly("last_commanded_width",
                     &rcs::hw::FHState::last_commanded_width)
       .def_readonly("max_unnormalized_width",

From 2eec258557a044dd9f73eaecb591ff43833af964 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Mon, 31 Mar 2025 13:10:38 +0200
Subject: [PATCH 32/56] feat(wrappers): storage wrapper delays action

---
 python/rcsss/envs/wrappers.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/python/rcsss/envs/wrappers.py b/python/rcsss/envs/wrappers.py
index 635b1826..4d4c9c22 100644
--- a/python/rcsss/envs/wrappers.py
+++ b/python/rcsss/envs/wrappers.py
@@ -39,6 +39,7 @@ def __init__(
         self.record_numpy = record_numpy
         self.gif = gif
         self.camera_set = camera_set
+        self.prev_obs: dict | None = None
 
         # make folders
         self.path = Path(path) / self.FOLDER.format(self.timestamp)
@@ -102,7 +103,9 @@ def flush(self):
     def step(self, action: dict) -> tuple[Any, SupportsFloat, bool, bool, dict[str, Any]]:
         obs, reward, terminated, truncated, info = super().step(action)
         # write obs and action into data
-        act_obs = {"action": action, "observation": obs, "timestamp": datetime.now().timestamp()}
+        act_obs = {"action": action, "observation": self.prev_obs, "timestamp": datetime.now().timestamp()}
+        # delay observation by one time step to ensure that the observation leads to the action (and not like in gym env)
+        self.prev_obs = obs
         act_obs["timestamp"] = datetime.now().timestamp()
         self.data["language_instruction"] = self.language_instruction
         for key, value in act_obs.items():
@@ -116,7 +119,9 @@ def step(self, action: dict) -> tuple[Any, SupportsFloat, bool, bool, dict[str,
     def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[Any, dict[str, Any]]:
         self.flush()
         self.step_count = 0
+        self.prev_obs = None
         re = super().reset(seed=seed, options=options)
+        self.prev_obs = re[0]
         return re
 
     def close(self):

From 8efb959d47285e0311d6e727dcf2e38d88660db1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Mon, 31 Mar 2025 13:19:54 +0200
Subject: [PATCH 33/56] refactor(storage wrapper)!: flag removed to simplify
 interface

The record_numpy flag has been removed to simplify the storage recorder interface and prepare it for other non numpy recorders.
---
 python/rcsss/control/run_vla.py | 2 +-
 python/rcsss/envs/wrappers.py   | 5 +----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/python/rcsss/control/run_vla.py b/python/rcsss/control/run_vla.py
index 22ac6a38..943be21a 100644
--- a/python/rcsss/control/run_vla.py
+++ b/python/rcsss/control/run_vla.py
@@ -189,7 +189,7 @@ def main():
             )
             env.get_wrapper_attr("sim").open_gui()
         if not DEBUG:
-            env = StorageWrapper(env, path="octo_realv1_async", instruction=INSTRUCTION, record_numpy=False)
+            env = StorageWrapper(env, path="octo_realv1_async", instruction=INSTRUCTION)
 
             # record videos
             video_path = env.path / "videos"
diff --git a/python/rcsss/envs/wrappers.py b/python/rcsss/envs/wrappers.py
index 4d4c9c22..ed208dbc 100644
--- a/python/rcsss/envs/wrappers.py
+++ b/python/rcsss/envs/wrappers.py
@@ -27,7 +27,6 @@ def __init__(
         path: str,
         instruction: str | None = None,
         description: str | None = None,
-        record_numpy: bool = True,
         gif: bool = True,
         camera_set: BaseHardwareCameraSet | None = None,
     ):
@@ -36,7 +35,6 @@ def __init__(
         self.step_count = 0
         self.data = {}
         self.timestamp = str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
-        self.record_numpy = record_numpy
         self.gif = gif
         self.camera_set = camera_set
         self.prev_obs: dict | None = None
@@ -70,8 +68,7 @@ def flush(self):
         if len(self.data) == 0 or self.step_count == 0:
             return
         print(self.data.keys())
-        if self.record_numpy:
-            np.savez(self.path / self.FILE.format(self.episode_count), **self.data)
+        np.savez(self.path / self.FILE.format(self.episode_count), **self.data)
         if self.camera_set is not None and self.camera_set.recording_ongoing():
             self.camera_set.stop_video()
 

From 12f5faac922288978b42f933e8c9a042a02b85a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Mon, 31 Mar 2025 13:22:51 +0200
Subject: [PATCH 34/56] feat(wrapper)!: added hdf5 recorder wrapper

---
 python/rcsss/envs/wrappers.py | 206 +++++++++++++++++++++++++++++++++-
 1 file changed, 204 insertions(+), 2 deletions(-)

diff --git a/python/rcsss/envs/wrappers.py b/python/rcsss/envs/wrappers.py
index ed208dbc..45ba6053 100644
--- a/python/rcsss/envs/wrappers.py
+++ b/python/rcsss/envs/wrappers.py
@@ -1,4 +1,3 @@
-import copy
 from datetime import datetime
 import os
 from pathlib import Path
@@ -13,8 +12,11 @@
 from PIL import Image
 from rcsss.camera.hw import BaseHardwareCameraSet
 
+import subprocess
+import h5py
 
-class StorageWrapper(gym.Wrapper):
+
+class StorageWrapperNumpy(gym.Wrapper):
     # TODO: this should also record the instruction
     FILE = "episode_{}.npz"
     GIF = "{}_episode_{}_{}.gif"
@@ -135,6 +137,206 @@ def log_files(self, file2content: dict[str, str]):
                 f.write(content)
 
 
+# TODO: gifs should not be created after each episode, but there should rather be tool
+# to create them from a dataset, how about video?
+class StorageWrapperHDF5(gym.Wrapper):
+    FILE = "data.h5"
+    GIF = "{}_{}.gif"
+    FOLDER = "experiment_{}"
+    GIF_DURATION_S = 0.5
+
+    def __init__(
+        self,
+        env: gym.Env,
+        path: str,
+        instruction: str | None = None,
+        description: str | None = None,
+        gif: bool = True,
+        camera_set: BaseHardwareCameraSet | None = None,
+    ):
+        super().__init__(env)
+        self.episode_count = 0
+        self.step_count = 0
+        self.timestamp = str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
+        self.gif = gif
+        self.prev_obs: dict | None = None
+        self.datasets = {}
+        self.camera_set = camera_set
+
+        # Make folders
+        self.path = Path(path)  # / self.FOLDER.format(self.timestamp)
+        Path(self.path).mkdir(parents=True, exist_ok=True)
+        if description is None:
+            # Write a small description from input into file
+            description = input("Please enter a description for this experiment: ")
+        self.description = description
+
+        # with open(self.path / "description.txt", "w") as f:
+        #     f.write(self.description)
+
+        if instruction is None:
+            # Write instruction from input into file
+            instruction = input("Instruction: ")
+        self.language_instruction = str(instruction)
+        # Open HDF5 file in append mode
+        self.h5file = h5py.File(self.path / self.FILE, "a")
+        # Check if instruction group exists
+        if self.language_instruction in self.h5file:
+            self.instruction_group = self.h5file[self.language_instruction]
+        else:
+            self.instruction_group = self.h5file.create_group(self.language_instruction)
+
+        self.gif_path = self.path / "gifs"
+        if self.gif:
+            self.gif_path.mkdir(parents=True, exist_ok=True)
+
+    def append_to_hdf5(self, group, data_dict, index):
+        for key, value in data_dict.items():
+            if isinstance(value, dict):
+                # Handle subgroup
+                if key not in group:
+                    subgroup = group.create_group(key)
+                else:
+                    subgroup = group[key]
+                self.append_to_hdf5(subgroup, value, index)
+            else:
+                # Handle dataset
+                dataset_name = key
+                full_dataset_path = group.name + "/" + dataset_name
+                if full_dataset_path not in self.datasets:
+                    # First time seeing this dataset
+                    # Determine dtype
+                    if isinstance(value, str):
+                        # Variable-length string
+                        dtype = h5py.string_dtype(encoding="utf-8")
+                        shape = ()
+                    elif np.isscalar(value):
+                        # Numeric scalar
+                        dtype = type(value)
+                        shape = ()
+                    elif isinstance(value, np.ndarray):
+                        # Numpy array
+                        dtype = value.dtype
+                        shape = value.shape
+                    else:
+                        # Other types, try to convert to numpy array
+                        try:
+                            value = np.array(value)
+                            dtype = value.dtype
+                            shape = value.shape
+                        except Exception as e:
+                            raise ValueError(f"Unsupported data type for key '{key}': {type(value)}") from e
+                    # Create dataset
+                    initial_shape = (index + 1,) + shape
+                    maxshape = (None,) + shape
+                    dataset = group.create_dataset(
+                        dataset_name, shape=initial_shape, maxshape=maxshape, chunks=True, dtype=dtype
+                    )
+                    self.datasets[full_dataset_path] = dataset
+                else:
+                    dataset = self.datasets[full_dataset_path]
+                    if dataset.shape[0] <= index:
+                        new_size = index + 1
+                        dataset.resize(new_size, axis=0)
+                # Store value
+                if isinstance(value, str):
+                    dataset[index] = value
+                elif np.isscalar(value):
+                    dataset[index] = value
+                else:
+                    dataset[index, ...] = value
+
+    def flush(self):
+        """Writes data to disk and generates GIFs if enabled."""
+        if self.step_count == 0:
+            return
+        # Flush HDF5 file
+        self.h5file.flush()
+        # Stop camera recording if applicable
+        if self.camera_set is not None and self.camera_set.recording_ongoing():
+            self.camera_set.stop_video()
+        # Generate GIFs if enabled
+        if self.gif:
+            for key in ["side", "right_side", "bird_eye", "left_side", "front"]:
+                img_dataset_path = f"observation/frames/{key}/rgb"
+                if img_dataset_path in self.episode_group:
+                    dataset = self.episode_group[img_dataset_path]
+                    imgs = []
+                    previous_timestamp = 0
+                    timestamp_dataset = self.episode_group["timestamp"]
+                    for idx in range(min(len(dataset), len(timestamp_dataset))):
+                        # Skip images that have timestamps closer together than self.GIF_DURATION_S
+                        img = dataset[idx]
+                        timestamp = timestamp_dataset[idx]
+                        if timestamp - previous_timestamp < self.GIF_DURATION_S:
+                            continue
+                        previous_timestamp = timestamp
+                        imgs.append(Image.fromarray(img))
+                    if imgs:
+                        imgs[0].save(
+                            self.gif_path / self.GIF.format(self.timestamp, key),
+                            save_all=True,
+                            append_images=imgs[1:],
+                            duration=self.GIF_DURATION_S * 1000,
+                            loop=0,
+                        )
+        # Reset datasets for the next episode
+        self.datasets = {}
+        self.episode_count += 1
+
+    def step(self, action: dict) -> tuple[Any, SupportsFloat, bool, bool, dict[str, Any]]:
+        obs, reward, terminated, truncated, info = super().step(action)
+        # Delay observation by one time step
+        act_obs = {"action": action, "observation": self.prev_obs, "timestamp": datetime.now().timestamp()}
+        self.prev_obs = obs  # Update prev_obs for next step
+        # Append data to HDF5
+        self.append_to_hdf5(self.episode_group, act_obs, self.step_count)
+        self.step_count += 1
+        return obs, reward, terminated, truncated, info
+
+    def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[Any, dict[str, Any]]:
+        self.flush()
+        self.step_count = 0
+        self.prev_obs = None
+        # Create a new episode group
+        episode_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_%f")
+        self.episode_group = self.instruction_group.create_group(episode_name)
+        self.datasets = {}
+        # Get git metadata
+        try:
+            git_diff = subprocess.check_output(["git", "diff", "--submodule=diff"]).decode("utf-8")
+            git_commit_id = subprocess.check_output(["git", "log", "--format=%H", "-n", "1"]).decode("utf-8").strip()
+            git_submodule_status = subprocess.check_output(["git", "submodule", "status"]).decode("utf-8")
+        except Exception as e:
+            git_diff = ""
+            git_commit_id = ""
+            git_submodule_status = ""
+        # Store git info as attributes
+        self.episode_group.attrs["git_diff"] = git_diff
+        self.episode_group.attrs["git_commit_id"] = git_commit_id
+        self.episode_group.attrs["git_submodule_status"] = git_submodule_status
+        # Also store description and language instruction
+        self.episode_group.attrs["description"] = self.description
+        self.episode_group.attrs["language_instruction"] = self.language_instruction
+        result = super().reset(seed=seed, options=options)
+        self.prev_obs = result[0]  # Initialize prev_obs
+        return result
+
+    def close(self):
+        self.flush()
+        self.h5file.close()
+        return super().close()
+
+    @property
+    def logger_dir(self):
+        return self.path
+
+    def log_files(self, file2content: dict[str, str]):
+        for fn, content in file2content.items():
+            with open(self.path / fn, "w") as f:
+                f.write(content)
+
+
 def listdict2dictlist(LD):
     return {k: [dic[k] for dic in LD] for k in LD[0]}
 

From 18079778b00f14ba89e7cc313ec095ccba2699e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Mon, 31 Mar 2025 14:45:22 +0200
Subject: [PATCH 35/56] refactor(recorder wrapper): moved gif generation into
 own function

---
 python/rcsss/envs/wrappers.py | 161 +++++++++++++++++++++++-----------
 1 file changed, 108 insertions(+), 53 deletions(-)

diff --git a/python/rcsss/envs/wrappers.py b/python/rcsss/envs/wrappers.py
index 45ba6053..4cce48af 100644
--- a/python/rcsss/envs/wrappers.py
+++ b/python/rcsss/envs/wrappers.py
@@ -19,9 +19,7 @@
 class StorageWrapperNumpy(gym.Wrapper):
     # TODO: this should also record the instruction
     FILE = "episode_{}.npz"
-    GIF = "{}_episode_{}_{}.gif"
     FOLDER = "experiment_{}"
-    GIF_DURATION_S = 0.5
 
     def __init__(
         self,
@@ -74,31 +72,44 @@ def flush(self):
         if self.camera_set is not None and self.camera_set.recording_ongoing():
             self.camera_set.stop_video()
 
-        if self.gif:
-            # for key in ["side", "wrist", "bird_eye", "openvla_view"]:
-            for key in ["side", "right_side", "bird_eye", "left_side", "front"]:
-                if f"observation.frames.{key}.rgb" in self.data:
-                    imgs = []
-                    previous_timestamp = 0
-                    for idx in range(min(len(self.data[f"observation.frames.{key}.rgb"]), len(self.data["timestamp"]))):
-                        # skip images that have timestamps closer together than 0.5s
-                        img = self.data[f"observation.frames.{key}.rgb"][idx]
-                        if self.data["timestamp"][idx] - previous_timestamp < self.GIF_DURATION_S:
-                            continue
-                        previous_timestamp = self.data["timestamp"][idx]
-                        imgs.append(Image.fromarray(img))
-                    imgs[0].save(
-                        self.gif_path / self.GIF.format(self.timestamp, self.episode_count, key),
-                        save_all=True,
-                        append_images=imgs[1:],
-                        duration=self.GIF_DURATION_S * 1000,
-                        loop=0,
-                    )
-
         self.episode_count += 1
         self.data = {}
 
-    # TODO: fix recorder order
+    @staticmethod
+    def create_gif(path: str, episode: int, key: str, frame_interval_s: float = 0.5):
+        data = np.load(path)
+        gif_path = Path(path) / "gifs"
+        gif_path.mkdir(parents=True, exist_ok=True)
+
+        if (
+            "observation" in data
+            and "frames" in data["observation"]
+            and key in data["observation"]["frames"]
+            and "rgb" in data["observation"]["frames"][key]
+        ):
+            imgs = []
+            previous_timestamp = 0
+            d = data["observation"]["frames"][key]["rgb"]
+            for idx in range(min(len(d), len(data["timestamp"]))):
+                # skip images that have timestamps closer together than 0.5s
+                img = d[idx]
+                if data["timestamp"][idx] - previous_timestamp < frame_interval_s:
+                    continue
+                previous_timestamp = data["timestamp"][idx]
+                imgs.append(Image.fromarray(img))
+            imgs[0].save(
+                gif_path / "episode_{}_{}.gif".format(episode, key),
+                save_all=True,
+                append_images=imgs[1:],
+                duration=frame_interval_s * 1000,
+                loop=0,
+            )
+        else:
+            if "observation" in data and "frames" in data["observation"] and key in data["observation"]["frames"]:
+                raise ValueError(f"Key {key} not found in {data['observation']['frames'].keys()}")
+            else:
+                raise ValueError(f"Key {key} not found in data")
+
     def step(self, action: dict) -> tuple[Any, SupportsFloat, bool, bool, dict[str, Any]]:
         obs, reward, terminated, truncated, info = super().step(action)
         # write obs and action into data
@@ -141,9 +152,6 @@ def log_files(self, file2content: dict[str, str]):
 # to create them from a dataset, how about video?
 class StorageWrapperHDF5(gym.Wrapper):
     FILE = "data.h5"
-    GIF = "{}_{}.gif"
-    FOLDER = "experiment_{}"
-    GIF_DURATION_S = 0.5
 
     def __init__(
         self,
@@ -164,7 +172,7 @@ def __init__(
         self.camera_set = camera_set
 
         # Make folders
-        self.path = Path(path)  # / self.FOLDER.format(self.timestamp)
+        self.path = Path(path)
         Path(self.path).mkdir(parents=True, exist_ok=True)
         if description is None:
             # Write a small description from input into file
@@ -255,35 +263,82 @@ def flush(self):
         # Stop camera recording if applicable
         if self.camera_set is not None and self.camera_set.recording_ongoing():
             self.camera_set.stop_video()
-        # Generate GIFs if enabled
-        if self.gif:
-            for key in ["side", "right_side", "bird_eye", "left_side", "front"]:
-                img_dataset_path = f"observation/frames/{key}/rgb"
-                if img_dataset_path in self.episode_group:
-                    dataset = self.episode_group[img_dataset_path]
-                    imgs = []
-                    previous_timestamp = 0
-                    timestamp_dataset = self.episode_group["timestamp"]
-                    for idx in range(min(len(dataset), len(timestamp_dataset))):
-                        # Skip images that have timestamps closer together than self.GIF_DURATION_S
-                        img = dataset[idx]
-                        timestamp = timestamp_dataset[idx]
-                        if timestamp - previous_timestamp < self.GIF_DURATION_S:
-                            continue
-                        previous_timestamp = timestamp
-                        imgs.append(Image.fromarray(img))
-                    if imgs:
-                        imgs[0].save(
-                            self.gif_path / self.GIF.format(self.timestamp, key),
-                            save_all=True,
-                            append_images=imgs[1:],
-                            duration=self.GIF_DURATION_S * 1000,
-                            loop=0,
-                        )
         # Reset datasets for the next episode
         self.datasets = {}
         self.episode_count += 1
 
+    @staticmethod
+    def create_gif(
+        h5file_path: str,
+        language_instruction: str,
+        episode_name: str,
+        key: str,
+        frame_interval_s: float = 0.5,
+        output_folder: str = None,
+    ):
+        """
+        Create a GIF from images stored in the HDF5 file.
+
+        Args:
+            h5file_path (str): Path to the HDF5 file.
+            language_instruction (str): The instruction group under which the episode is stored.
+            episode_name (str): The name of the episode group.
+            key (str): The camera key (e.g., 'front', 'side', etc.).
+            frame_interval_s (float, optional): Minimum time between frames to include in the GIF. Defaults to 0.5 seconds.
+            output_folder (str, optional): Path to the directory to save the GIF. Defaults to the 'gifs' folder in the HDF5 file's directory.
+        """
+
+        with h5py.File(h5file_path, "r") as h5file:
+            if language_instruction in h5file:
+                instruction_group = h5file[language_instruction]
+            else:
+                raise ValueError(f"Language instruction '{language_instruction}' not found in HDF5 file.")
+
+            if episode_name in instruction_group:
+                episode_group = instruction_group[episode_name]
+            else:
+                raise ValueError(f"Episode '{episode_name}' not found under instruction '{language_instruction}'.")
+
+            img_dataset_path = f"observation/frames/{key}/rgb"
+            if img_dataset_path in episode_group:
+                images_dataset = episode_group[img_dataset_path]
+            else:
+                raise ValueError(f"Image dataset '{img_dataset_path}' not found in episode '{episode_name}'.")
+
+            if "timestamp" in episode_group:
+                timestamps = episode_group["timestamp"][:]
+            else:
+                raise ValueError(f"'timestamp' dataset not found in episode '{episode_name}'.")
+
+            imgs = []
+            previous_timestamp = None  # Initialize as None
+            for idx in range(min(len(images_dataset), len(timestamps))):
+                timestamp = timestamps[idx]
+                if previous_timestamp is not None and (timestamp - previous_timestamp) < frame_interval_s:
+                    continue
+                previous_timestamp = timestamp
+                img_array = images_dataset[idx]
+                img = Image.fromarray(img_array)
+                imgs.append(img)
+
+            if not imgs:
+                raise ValueError(f"No images found to create GIF for key '{key}' in episode '{episode_name}'.")
+
+            if output_folder is None:
+                output_folder = os.path.join(os.path.dirname(h5file_path), "gifs")
+            os.makedirs(output_folder, exist_ok=True)
+
+            gif_filename = f"{language_instruction}_{episode_name}_{key}.gif"
+            gif_filepath = os.path.join(output_folder, gif_filename)
+
+            imgs[0].save(
+                gif_filepath,
+                save_all=True,
+                append_images=imgs[1:],
+                duration=frame_interval_s * 1000,  # Duration per frame in milliseconds
+                loop=0,
+            )
+
     def step(self, action: dict) -> tuple[Any, SupportsFloat, bool, bool, dict[str, Any]]:
         obs, reward, terminated, truncated, info = super().step(action)
         # Delay observation by one time step

From 8ab41a7350aecfc60c51e300c1ac9a473082941c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Mon, 31 Mar 2025 14:48:06 +0200
Subject: [PATCH 36/56] feat: frame rate logger

---
 python/rcsss/camera/interface.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/python/rcsss/camera/interface.py b/python/rcsss/camera/interface.py
index 93602580..79ea2e6d 100644
--- a/python/rcsss/camera/interface.py
+++ b/python/rcsss/camera/interface.py
@@ -1,14 +1,18 @@
 from dataclasses import dataclass
 from datetime import datetime
+import logging
 from time import time, sleep
 from typing import Any, Protocol
 
 from pydantic import BaseModel, Field
 
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 
 class SimpleFrameRate:
     def __init__(self):
         self.t = None
+        self._last_print = None
 
     def reset(self):
         self.t = None
@@ -16,6 +20,7 @@ def reset(self):
     def __call__(self, frame_rate: int | float):
         if self.t is None:
             self.t = time()
+            self._last_print = self.t
             sleep(1 / frame_rate if isinstance(frame_rate, int) else frame_rate)
             return
         sleep_time = (
@@ -23,6 +28,10 @@ def __call__(self, frame_rate: int | float):
         )
         if sleep_time > 0:
             sleep(sleep_time)
+        if self._last_print is None or time() - self._last_print > 30:
+            self._last_print = time()
+            logger.info(f"FPS: {1 / (time() - self.t)}")
+
         self.t = time()
 
 

From 10c3bbb0f15701141806b703e62757e244eb2091 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Mon, 31 Mar 2025 15:10:18 +0200
Subject: [PATCH 37/56] feat(wrapper recorder): hdf5 gzip compression

---
 pyproject.toml                | 1 +
 python/rcsss/envs/wrappers.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index b41104b9..afde7fa0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,6 +22,7 @@ dependencies = ["websockets>=11.0",
     "pillow~=10.3",
     "python-dotenv==1.0.1",
     "opencv-python~=4.10.0.84",
+    "h5py~=3.13.0",
     # NOTE: when changing the mujoco version, also change it in requirements_dev.txt
     "mujoco==3.2.6",
     # NOTE: Same for pinocchio (pin)
diff --git a/python/rcsss/envs/wrappers.py b/python/rcsss/envs/wrappers.py
index 4cce48af..c1b84eff 100644
--- a/python/rcsss/envs/wrappers.py
+++ b/python/rcsss/envs/wrappers.py
@@ -238,7 +238,7 @@ def append_to_hdf5(self, group, data_dict, index):
                     initial_shape = (index + 1,) + shape
                     maxshape = (None,) + shape
                     dataset = group.create_dataset(
-                        dataset_name, shape=initial_shape, maxshape=maxshape, chunks=True, dtype=dtype
+                        dataset_name, shape=initial_shape, maxshape=maxshape, chunks=True, dtype=dtype , compression="gzip"
                     )
                     self.datasets[full_dataset_path] = dataset
                 else:

From 9a8c5acb03bea8e21748925a26087ab9c3c3dde0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 2 Apr 2025 08:21:10 +0200
Subject: [PATCH 38/56] fix(hw gripper): exception handling for franka hand,
 improved roboustness

---
 src/hw/FrankaHand.cpp | 50 ++++++++++++++++++++++++++++++++++---------
 src/hw/FrankaHand.h   |  2 ++
 2 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/src/hw/FrankaHand.cpp b/src/hw/FrankaHand.cpp
index 8d8fcd13..0ef42013 100644
--- a/src/hw/FrankaHand.cpp
+++ b/src/hw/FrankaHand.cpp
@@ -1,9 +1,11 @@
 #include "FrankaHand.h"
 
+#include <franka/exception.h>
 #include <franka/gripper.h>
 
 #include <Eigen/Core>
 #include <cmath>
+#include <iostream>
 #include <string>
 #include <tuple>
 
@@ -62,7 +64,7 @@ void FrankaHand::set_normalized_width(double width, double force) {
     this->gripper.move(width, this->cfg.speed);
   } else {
     this->gripper.grasp(width, this->cfg.speed, force, this->cfg.epsilon_inner,
-                  this->cfg.epsilon_outer);
+                        this->cfg.epsilon_outer);
   }
 }
 double FrankaHand::get_normalized_width() {
@@ -70,13 +72,21 @@ double FrankaHand::get_normalized_width() {
   return gripper_state.width / gripper_state.max_width;
 }
 
-void FrankaHand::m_stop(){
-  this->gripper.stop();
+void FrankaHand::m_stop() {
+  try {
+    this->gripper.stop();
+  } catch (const franka::CommandException &e) {
+    std::cerr << "franka hand command exception ignored stop" << std::endl;
+  }
+  this->m_wait();
+  this->is_moving = false;
+}
+
+void FrankaHand::m_wait() {
   if (this->control_thread.has_value()) {
     this->control_thread->join();
     this->control_thread.reset();
   }
-  this->is_moving = false;
 }
 
 void FrankaHand::m_reset() {
@@ -100,12 +110,15 @@ bool FrankaHand::homing() {
   // Do a homing in order to estimate the maximum
   // grasping width with the current fingers.
   this->is_moving = true;
-  bool success =  this->gripper.homing();
+  bool success = this->gripper.homing();
   this->is_moving = false;
   return success;
 }
 
 void FrankaHand::grasp() {
+  if (this->is_moving || this->last_commanded_width == 0) {
+    return;
+  }
   this->last_commanded_width = 0;
   if (!this->cfg.async_control) {
     this->is_moving = true;
@@ -114,14 +127,21 @@ void FrankaHand::grasp() {
     return;
   }
   this->m_stop();
-  this->control_thread = std::thread([&](){
+  this->control_thread = std::thread([&]() {
     this->is_moving = true;
-    this->gripper.grasp(0, this->cfg.speed, this->cfg.force, 1, 1);
+    try {
+      this->gripper.grasp(0, this->cfg.speed, this->cfg.force, 1, 1);
+    } catch (const franka::CommandException &e) {
+      std::cerr << "franka hand command exception ignored grasp" << std::endl;
+    }
     this->is_moving = false;
   });
 }
 
 void FrankaHand::open() {
+  if (this->is_moving || this->last_commanded_width == this->max_width) {
+    return;
+  }
   this->last_commanded_width = this->max_width;
   if (!this->cfg.async_control) {
     this->is_moving = true;
@@ -130,13 +150,23 @@ void FrankaHand::open() {
     return;
   }
   this->m_stop();
-  this->control_thread = std::thread([&](){
+  this->control_thread = std::thread([&]() {
     this->is_moving = true;
-    this->gripper.move(this->max_width, this->cfg.speed);
+    try {
+      // perhaps we should use graps here
+      this->gripper.move(this->max_width, this->cfg.speed);
+      // this->gripper.grasp(this->max_width, this->cfg.speed, this->cfg.force,
+      // 1, 1);
+    } catch (const franka::CommandException &e) {
+      std::cerr << "franka hand command exception ignored open" << std::endl;
+    }
     this->is_moving = false;
   });
 }
 void FrankaHand::shut() {
+  if (this->is_moving || this->last_commanded_width == 0) {
+    return;
+  }
   this->last_commanded_width = 0;
   if (!this->cfg.async_control) {
     this->is_moving = true;
@@ -145,7 +175,7 @@ void FrankaHand::shut() {
     return;
   }
   this->m_stop();
-  this->control_thread = std::thread([&](){
+  this->control_thread = std::thread([&]() {
     this->is_moving = true;
     this->gripper.move(0, this->cfg.speed);
     this->is_moving = false;
diff --git a/src/hw/FrankaHand.h b/src/hw/FrankaHand.h
index 7f1cbb31..1ddf40a3 100644
--- a/src/hw/FrankaHand.h
+++ b/src/hw/FrankaHand.h
@@ -45,10 +45,12 @@ class FrankaHand : public common::Gripper {
   FHConfig cfg;
   double max_width;
   double last_commanded_width;
+  // TODO: might be better if is_moving is a lock
   bool is_moving = false;
   std::optional<std::thread> control_thread = std::nullopt;
   void m_reset();
   void m_stop();
+  void m_wait();
 
  public:
   FrankaHand(const std::string &ip, const FHConfig &cfg);

From cad65acd6792a7e16f6f0d2e0d124118f2418744 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 2 Apr 2025 08:21:45 +0200
Subject: [PATCH 39/56] format(cpp)

---
 src/common/LinearPoseTrajInterpolator.h | 18 ++++---
 src/hw/FR3.cpp                          | 67 +++++++++++++------------
 src/hw/FR3.h                            |  5 +-
 src/pybind/rcsss.cpp                    |  4 +-
 4 files changed, 52 insertions(+), 42 deletions(-)

diff --git a/src/common/LinearPoseTrajInterpolator.h b/src/common/LinearPoseTrajInterpolator.h
index 03a3d0f8..65a0b848 100644
--- a/src/common/LinearPoseTrajInterpolator.h
+++ b/src/common/LinearPoseTrajInterpolator.h
@@ -31,9 +31,9 @@ class LinearPoseTrajInterpolator {
         max_time_(1.),
         start_time_(0.),
         start_(false),
-        first_goal_(true) {};
+        first_goal_(true){};
 
-  inline ~LinearPoseTrajInterpolator() {};
+  inline ~LinearPoseTrajInterpolator(){};
 
   inline void Reset(const double &time_sec, const Eigen::Vector3d &p_start,
                     const Eigen::Quaterniond &q_start,
@@ -96,7 +96,7 @@ class LinearPoseTrajInterpolator {
   };
 };
 class LinearJointPositionTrajInterpolator {
-private:
+ private:
   using Vector7d = Eigen::Matrix<double, 7, 1>;
   using Vector7i = Eigen::Matrix<int, 7, 1>;
 
@@ -113,12 +113,16 @@ class LinearJointPositionTrajInterpolator {
   bool start_;
   bool first_goal_;
 
-  double interpolation_fraction_; // fraction of actual interpolation within an
-                                  // interval
+  double interpolation_fraction_;  // fraction of actual interpolation within an
+                                   // interval
 
-public:
+ public:
   inline LinearJointPositionTrajInterpolator()
-      : dt_(0.), last_time_(0.), max_time_(1.), start_time_(0.), start_(false),
+      : dt_(0.),
+        last_time_(0.),
+        max_time_(1.),
+        start_time_(0.),
+        start_(false),
         first_goal_(true){};
 
   inline ~LinearJointPositionTrajInterpolator(){};
diff --git a/src/hw/FR3.cpp b/src/hw/FR3.cpp
index 28be62e6..de061021 100644
--- a/src/hw/FR3.cpp
+++ b/src/hw/FR3.cpp
@@ -87,7 +87,7 @@ void FR3::set_default_robot_behavior() {
 
 common::Pose FR3::get_cartesian_position() {
   common::Pose x;
-  if (this->running_controller == Controller::none){
+  if (this->running_controller == Controller::none) {
     this->curr_state = this->robot.readOnce();
     x = common::Pose(this->curr_state.O_T_EE);
   } else {
@@ -99,7 +99,7 @@ common::Pose FR3::get_cartesian_position() {
 }
 
 void FR3::set_joint_position(const common::Vector7d &q) {
-  if (this->cfg.async_control){
+  if (this->cfg.async_control) {
     this->controller_set_joint_position(q);
     return;
   }
@@ -121,9 +121,8 @@ common::Vector7d FR3::get_joint_position() {
   return joints;
 }
 
-void FR3::set_guiding_mode(bool x, bool y, bool z,
-                           bool roll, bool pitch, bool yaw,
-                           bool elbow) {
+void FR3::set_guiding_mode(bool x, bool y, bool z, bool roll, bool pitch,
+                           bool yaw, bool elbow) {
   std::array<bool, 6> activated = {x, y, z, roll, pitch, yaw};
   this->robot.setGuidingMode(activated, elbow);
 }
@@ -160,7 +159,7 @@ void TorqueSafetyGuardFn(std::array<double, 7> &tau_d_array, double min_torque,
 
 void FR3::controller_set_joint_position(const common::Vector7d &desired_q) {
   // from deoxys/config/osc-position-controller.yml
-  double traj_interpolation_time_fraction = 1.0; // in s
+  double traj_interpolation_time_fraction = 1.0;  // in s
   // form deoxys/config/charmander.yml
   int policy_rate = 20;
   int traj_rate = 500;
@@ -170,8 +169,10 @@ void FR3::controller_set_joint_position(const common::Vector7d &desired_q) {
     this->get_joint_position();
   } else if (this->running_controller != Controller::jsc) {
     // runtime error
-    throw std::runtime_error("Controller type must but joint space but is " +
-                             std::to_string(this->running_controller) + ". To change controller type stop the current controller first.");
+    throw std::runtime_error(
+        "Controller type must but joint space but is " +
+        std::to_string(this->running_controller) +
+        ". To change controller type stop the current controller first.");
   } else {
     this->interpolator_mutex.lock();
   }
@@ -183,7 +184,7 @@ void FR3::controller_set_joint_position(const common::Vector7d &desired_q) {
 
   // if not thread is running, then start
   if (this->running_controller == Controller::none) {
-    this->running_controller= Controller::jsc;
+    this->running_controller = Controller::jsc;
     this->control_thread = std::thread(&FR3::joint_controller, this);
   } else {
     this->interpolator_mutex.unlock();
@@ -202,8 +203,10 @@ void FR3::osc_set_cartesian_position(
     this->controller_time = 0.0;
     this->get_cartesian_position();
   } else if (this->running_controller != Controller::osc) {
-    throw std::runtime_error("Controller type must but osc but is " +
-                             std::to_string(this->running_controller) + ". To change controller type stop the current controller first.");
+    throw std::runtime_error(
+        "Controller type must but osc but is " +
+        std::to_string(this->running_controller) +
+        ". To change controller type stop the current controller first.");
   } else {
     this->interpolator_mutex.lock();
   }
@@ -217,7 +220,7 @@ void FR3::osc_set_cartesian_position(
 
   // if not thread is running, then start
   if (this->running_controller == Controller::none) {
-    this->running_controller= Controller::osc;
+    this->running_controller = Controller::osc;
     this->control_thread = std::thread(&FR3::osc, this);
   } else {
     this->interpolator_mutex.unlock();
@@ -226,7 +229,8 @@ void FR3::osc_set_cartesian_position(
 
 // method to stop thread
 void FR3::stop_control_thread() {
-  if (this->control_thread.has_value() && this->running_controller != Controller::none) {
+  if (this->control_thread.has_value() &&
+      this->running_controller != Controller::none) {
     this->running_controller = Controller::none;
     this->control_thread->join();
     this->control_thread.reset();
@@ -242,11 +246,11 @@ void FR3::osc() {
   this->set_default_robot_behavior();
 
   // high collision threshold values for high impedance
-  // this->robot.setCollisionBehavior(
-  //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
-  //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
-  //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
-  //     {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}});
+  this->robot.setCollisionBehavior(
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
+      {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0}});
 
   // from bench mark
   // ([150.0, 150.0, 60.0], 250.0), // kp_translation, kp_rotation
@@ -546,18 +550,17 @@ void FR3::joint_controller() {
   });
 }
 
-void FR3::zero_torque_guiding(){
-
+void FR3::zero_torque_guiding() {
   if (this->running_controller != Controller::none) {
-    throw std::runtime_error("A controller is currently running. Please stop it first.");
+    throw std::runtime_error(
+        "A controller is currently running. Please stop it first.");
   }
-    this->controller_time = 0.0;
-    this->running_controller= Controller::ztc;
-    this->control_thread = std::thread(&FR3::zero_torque_controller, this);
+  this->controller_time = 0.0;
+  this->running_controller = Controller::ztc;
+  this->control_thread = std::thread(&FR3::zero_torque_controller, this);
 }
 
 void FR3::zero_torque_controller() {
-
   // high collision threshold values for high impedance
   robot.setCollisionBehavior(
       {{100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}},
@@ -580,7 +583,11 @@ void FR3::zero_torque_controller() {
   });
 }
 
-void FR3::move_home() { this->set_joint_position(q_home); }
+void FR3::move_home() {
+  // sync
+  MotionGenerator motion_generator(this->cfg.speed_factor, q_home);
+  this->robot.control(motion_generator);
+}
 
 void FR3::automatic_error_recovery() { this->robot.automaticErrorRecovery(); }
 
@@ -651,7 +658,7 @@ std::optional<std::shared_ptr<common::IK>> FR3::get_ik() { return this->m_ik; }
 
 void FR3::set_cartesian_position(const common::Pose &x) {
   // pose is assumed to be in the robots coordinate frame
-  if (this->cfg.async_control){
+  if (this->cfg.async_control) {
     this->osc_set_cartesian_position(x);
     return;
   }
@@ -663,10 +670,8 @@ void FR3::set_cartesian_position(const common::Pose &x) {
   } else {
     nominal_end_effector_frame_value = common::Pose::Identity();
   }
-  // nominal end effector frame should be on top of tcp offset as franka already takes care of
-  // the default franka hand offset
-  // lets add a franka hand offset
-
+  // nominal end effector frame should be on top of tcp offset as franka already
+  // takes care of the default franka hand offset lets add a franka hand offset
 
   if (this->cfg.ik_solver == IKSolver::franka) {
     // if gripper is attached the tcp offset will automatically be applied
diff --git a/src/hw/FR3.h b/src/hw/FR3.h
index 4f3bc568..dd8991cb 100644
--- a/src/hw/FR3.h
+++ b/src/hw/FR3.h
@@ -83,9 +83,8 @@ class FR3 : public common::Robot {
 
   common::Vector7d get_joint_position() override;
 
-  void set_guiding_mode(bool x, bool y, bool z,
-                           bool roll, bool pitch, bool yaw,
-                           bool elbow);
+  void set_guiding_mode(bool x, bool y, bool z, bool roll, bool pitch, bool yaw,
+                        bool elbow);
 
   void controller_set_joint_position(const common::Vector7d &desired_q);
   void osc_set_cartesian_position(
diff --git a/src/pybind/rcsss.cpp b/src/pybind/rcsss.cpp
index d6298a5a..0d135769 100644
--- a/src/pybind/rcsss.cpp
+++ b/src/pybind/rcsss.cpp
@@ -362,7 +362,9 @@ PYBIND11_MODULE(_core, m) {
       .def("set_default_robot_behavior",
            &rcs::hw::FR3::set_default_robot_behavior)
       .def("set_guiding_mode", &rcs::hw::FR3::set_guiding_mode,
-           py::arg("x") = true, py::arg("y") = true, py::arg("z") = true, py::arg("roll") = true, py::arg("pitch") = true, py::arg("yaw") = true, py::arg("elbow") = true)
+           py::arg("x") = true, py::arg("y") = true, py::arg("z") = true,
+           py::arg("roll") = true, py::arg("pitch") = true,
+           py::arg("yaw") = true, py::arg("elbow") = true)
       .def("zero_torque_guiding", &rcs::hw::FR3::zero_torque_guiding)
       .def("osc_set_cartesian_position",
            &rcs::hw::FR3::osc_set_cartesian_position,

From 1a86063f53a7128a9c01e437467f821039929ac3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 2 Apr 2025 08:23:40 +0200
Subject: [PATCH 40/56] fix(env gripper): remove same action ignore

Same actions lead to instable behavior in franka hand. Filtering out these actions is now already done on a lower level in the C++ code.
---
 python/rcsss/envs/base.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/python/rcsss/envs/base.py b/python/rcsss/envs/base.py
index f398589a..59d058b2 100644
--- a/python/rcsss/envs/base.py
+++ b/python/rcsss/envs/base.py
@@ -608,11 +608,10 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
         gripper_action = np.round(action[self.gripper_key]) if self.binary else action[self.gripper_key]
         gripper_action = np.clip(gripper_action, 0.0, 1.0)
 
-        if self._last_gripper_cmd is None or self._last_gripper_cmd != gripper_action:
-            if self.binary:
-                self._gripper.grasp() if gripper_action == self.BINARY_GRIPPER_CLOSED else self._gripper.open()
-            else:
-                self._gripper.set_normalized_width(gripper_action)
+        if self.binary:
+            self._gripper.grasp() if gripper_action == self.BINARY_GRIPPER_CLOSED else self._gripper.open()
+        else:
+            self._gripper.set_normalized_width(gripper_action)
         self._last_gripper_cmd = gripper_action
         del action[self.gripper_key]
         return action

From 410d29c274d81b2346b22121e45ddf6d9b631c5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 2 Apr 2025 08:27:21 +0200
Subject: [PATCH 41/56] refactor(wrappers): fixed gif and video generation for
 HDF5 data

---
 python/rcsss/camera/hw.py        |   7 +-
 python/rcsss/camera/interface.py |   2 +-
 python/rcsss/control/vive.py     |  14 ++--
 python/rcsss/envs/wrappers.py    | 127 ++++++++++++-------------------
 4 files changed, 59 insertions(+), 91 deletions(-)

diff --git a/python/rcsss/camera/hw.py b/python/rcsss/camera/hw.py
index 3aaa6481..dbe6d687 100644
--- a/python/rcsss/camera/hw.py
+++ b/python/rcsss/camera/hw.py
@@ -93,13 +93,13 @@ def start(self, warm_up: bool = True):
         self._thread = threading.Thread(target=self.polling_thread, args=(warm_up,))
         self._thread.start()
 
-    def record_video(self, path: Path, episode: int):
+    def record_video(self, path: Path, str_id: str):
         if self.recording_ongoing():
             return
         self.clear_buffer()
         for camera in self.camera_names:
             self.writer[camera] = cv2.VideoWriter(
-                str(path / f"episode_{episode}_{camera}.mp4"),
+                str(path / f"episode_{str_id}_{camera}.mp4"),
                 # migh require to install ffmpeg
                 cv2.VideoWriter_fourcc(*"mp4v"),  # type: ignore
                 self.config.frame_rate,
@@ -107,7 +107,8 @@ def record_video(self, path: Path, episode: int):
             )
 
     def recording_ongoing(self) -> bool:
-        return len(self.writer) > 0
+        with self._buffer_lock:
+            return len(self.writer) > 0
 
     def stop_video(self):
         if len(self.writer) > 0:
diff --git a/python/rcsss/camera/interface.py b/python/rcsss/camera/interface.py
index 79ea2e6d..fad9bdb9 100644
--- a/python/rcsss/camera/interface.py
+++ b/python/rcsss/camera/interface.py
@@ -28,7 +28,7 @@ def __call__(self, frame_rate: int | float):
         )
         if sleep_time > 0:
             sleep(sleep_time)
-        if self._last_print is None or time() - self._last_print > 30:
+        if self._last_print is None or time() - self._last_print > 10:
             self._last_print = time()
             logger.info(f"FPS: {1 / (time() - self.t)}")
 
diff --git a/python/rcsss/control/vive.py b/python/rcsss/control/vive.py
index 8ac0dce7..7edce7e6 100644
--- a/python/rcsss/control/vive.py
+++ b/python/rcsss/control/vive.py
@@ -30,7 +30,7 @@
     fr3_hw_env,
     fr3_sim_env,
 )
-from rcsss.envs.wrappers import StorageWrapper
+from rcsss.envs.wrappers import StorageWrapperHDF5, StorageWrapperNumpy
 
 
 logger = logging.getLogger(__name__)
@@ -42,16 +42,16 @@
 INCLUDE_ROTATION = True
 ROBOT_IP = "192.168.101.1"
 ROBOT_INSTANCE = RobotInstance.HARDWARE
-DEBUG = False
+DEBUG = True
 STORAGE_PATH = "teleop_data"
 # set camera dict to none disable cameras
 CAMERA_DICT = {
-    "wrist": "244222071045",
+    # "wrist": "244222071045",
     # "wrist": "218622278131", # new realsense
     "bird_eye": "243522070364",
     "side": "243522070385",
 }
-PRODUCE_GIF = True
+# CAMERA_DICT = None
 
 
 
@@ -233,7 +233,7 @@ def input_loop(env_rel, action_server: UDPViveActionServer, camera_set: RealSens
                 if camera_set is not None:
                     video_path = env_rel.path / "videos"
                     video_path.mkdir(parents=True, exist_ok=True)
-                    camera_set.record_video(env_rel.path / "videos", env_rel.episode_count)
+                    camera_set.record_video(env_rel.path / "videos", f"{env_rel.key}_{env_rel.timestamp}_{env_rel.episode_count}")
                 print(f'{env_rel.episode_count = }')
 
                 thread = threading.Thread(target=action_server.environment_step_loop)
@@ -265,7 +265,7 @@ def main():
                 control_mode=ControlMode.CARTESIAN_TQuart,
                 # control_mode=ControlMode.JOINTS,
                 gripper_cfg=default_fr3_hw_gripper_cfg(async_control=True),
-                max_relative_movement=(0.01, np.deg2rad(5)),
+                max_relative_movement=(0.01, np.deg2rad(1)),
                 # max_relative_movement=(0.5, np.deg2rad(90)),
                 # max_relative_movement=np.deg2rad(20),
                 relative_to=RelativeTo.CONFIGURED_ORIGIN,
@@ -285,7 +285,7 @@ def main():
             env_rel.get_wrapper_attr("sim").open_gui()
 
         if not DEBUG:
-            env_rel = StorageWrapper(env_rel, path=STORAGE_PATH, camera_set=camera_set, gif=PRODUCE_GIF)
+            env_rel = StorageWrapperHDF5(env_rel, path=STORAGE_PATH, camera_set=camera_set)
             # ip_secondary = "192.168.102.1"
             # with Desk.fci(ip_secondary, user, pw):
             #     f = rcsss.hw.FR3(ip_secondary)
diff --git a/python/rcsss/envs/wrappers.py b/python/rcsss/envs/wrappers.py
index c1b84eff..7961fad2 100644
--- a/python/rcsss/envs/wrappers.py
+++ b/python/rcsss/envs/wrappers.py
@@ -27,7 +27,6 @@ def __init__(
         path: str,
         instruction: str | None = None,
         description: str | None = None,
-        gif: bool = True,
         camera_set: BaseHardwareCameraSet | None = None,
     ):
         super().__init__(env)
@@ -35,7 +34,6 @@ def __init__(
         self.step_count = 0
         self.data = {}
         self.timestamp = str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
-        self.gif = gif
         self.camera_set = camera_set
         self.prev_obs: dict | None = None
 
@@ -59,9 +57,6 @@ def __init__(
         # get git diff
         os.system(f'git diff --submodule=diff > {os.path.join(str(self.path), "git_diff.txt")}')
 
-        self.gif_path = Path(path) / "gifs"
-        if self.gif:
-            self.gif_path.mkdir(parents=True, exist_ok=True)
 
     def flush(self):
         """writes data to disk"""
@@ -78,8 +73,6 @@ def flush(self):
     @staticmethod
     def create_gif(path: str, episode: int, key: str, frame_interval_s: float = 0.5):
         data = np.load(path)
-        gif_path = Path(path) / "gifs"
-        gif_path.mkdir(parents=True, exist_ok=True)
 
         if (
             "observation" in data
@@ -87,6 +80,8 @@ def create_gif(path: str, episode: int, key: str, frame_interval_s: float = 0.5)
             and key in data["observation"]["frames"]
             and "rgb" in data["observation"]["frames"][key]
         ):
+            gif_path = Path(path) / "gifs"
+            gif_path.mkdir(parents=True, exist_ok=True)
             imgs = []
             previous_timestamp = 0
             d = data["observation"]["frames"][key]["rgb"]
@@ -116,7 +111,6 @@ def step(self, action: dict) -> tuple[Any, SupportsFloat, bool, bool, dict[str,
         act_obs = {"action": action, "observation": self.prev_obs, "timestamp": datetime.now().timestamp()}
         # delay observation by one time step to ensure that the observation leads to the action (and not like in gym env)
         self.prev_obs = obs
-        act_obs["timestamp"] = datetime.now().timestamp()
         self.data["language_instruction"] = self.language_instruction
         for key, value in act_obs.items():
             if key not in self.data:
@@ -159,14 +153,13 @@ def __init__(
         path: str,
         instruction: str | None = None,
         description: str | None = None,
-        gif: bool = True,
+        key: str | None = None,
         camera_set: BaseHardwareCameraSet | None = None,
     ):
         super().__init__(env)
         self.episode_count = 0
         self.step_count = 0
         self.timestamp = str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
-        self.gif = gif
         self.prev_obs: dict | None = None
         self.datasets = {}
         self.camera_set = camera_set
@@ -186,17 +179,19 @@ def __init__(
             # Write instruction from input into file
             instruction = input("Instruction: ")
         self.language_instruction = str(instruction)
+
+        if key is None:
+            # Write instruction from input into file
+            key = input("key for hdf5: ")
+        self.key = key
         # Open HDF5 file in append mode
         self.h5file = h5py.File(self.path / self.FILE, "a")
         # Check if instruction group exists
-        if self.language_instruction in self.h5file:
-            self.instruction_group = self.h5file[self.language_instruction]
+        if self.key in self.h5file:
+            self.instruction_group = self.h5file[self.key]
         else:
-            self.instruction_group = self.h5file.create_group(self.language_instruction)
+            self.instruction_group = self.h5file.create_group(self.key)
 
-        self.gif_path = self.path / "gifs"
-        if self.gif:
-            self.gif_path.mkdir(parents=True, exist_ok=True)
 
     def append_to_hdf5(self, group, data_dict, index):
         for key, value in data_dict.items():
@@ -255,7 +250,7 @@ def append_to_hdf5(self, group, data_dict, index):
                     dataset[index, ...] = value
 
     def flush(self):
-        """Writes data to disk and generates GIFs if enabled."""
+        """Writes data to disk."""
         if self.step_count == 0:
             return
         # Flush HDF5 file
@@ -270,74 +265,46 @@ def flush(self):
     @staticmethod
     def create_gif(
         h5file_path: str,
-        language_instruction: str,
-        episode_name: str,
         key: str,
+        episode_name: str,
+        camera_id: str,
         frame_interval_s: float = 0.5,
         output_folder: str = None,
     ):
-        """
-        Create a GIF from images stored in the HDF5 file.
-
-        Args:
-            h5file_path (str): Path to the HDF5 file.
-            language_instruction (str): The instruction group under which the episode is stored.
-            episode_name (str): The name of the episode group.
-            key (str): The camera key (e.g., 'front', 'side', etc.).
-            frame_interval_s (float, optional): Minimum time between frames to include in the GIF. Defaults to 0.5 seconds.
-            output_folder (str, optional): Path to the directory to save the GIF. Defaults to the 'gifs' folder in the HDF5 file's directory.
-        """
-
-        with h5py.File(h5file_path, "r") as h5file:
-            if language_instruction in h5file:
-                instruction_group = h5file[language_instruction]
-            else:
-                raise ValueError(f"Language instruction '{language_instruction}' not found in HDF5 file.")
 
-            if episode_name in instruction_group:
-                episode_group = instruction_group[episode_name]
+        with h5py.File(h5file_path, "r") as data:
+            if (
+                key in data
+                and episode_name in data[key]
+                and "observation" in data[key][episode_name]
+                and "frames" in data[key][episode_name]["observation"]
+                and camera_id in data[key][episode_name]["observation"]["frames"]
+                and "rgb" in data[key][episode_name]["observation"]["frames"][camera_id]
+            ):
+                if output_folder is None:
+                    output_folder = os.path.join(os.path.dirname(h5file_path), "gifs")
+                os.makedirs(output_folder, exist_ok=True)
+                gif_filename = f"{key}_{episode_name}_{camera_id}.gif"
+                gif_filepath = os.path.join(output_folder, gif_filename)
+                imgs = []
+                previous_timestamp = 0
+                d = data["observation"]["frames"][camera_id]["rgb"]
+                for idx in range(min(len(d), len(data["timestamp"]))):
+                    # skip images that have timestamps closer together than 0.5s
+                    img = d[idx]
+                    if data["timestamp"][idx] - previous_timestamp < frame_interval_s:
+                        continue
+                    previous_timestamp = data["timestamp"][idx]
+                    imgs.append(Image.fromarray(img))
+                imgs[0].save(
+                    gif_filepath,
+                    save_all=True,
+                    append_images=imgs[1:],
+                    duration=frame_interval_s * 1000,
+                    loop=0,
+                )
             else:
-                raise ValueError(f"Episode '{episode_name}' not found under instruction '{language_instruction}'.")
-
-            img_dataset_path = f"observation/frames/{key}/rgb"
-            if img_dataset_path in episode_group:
-                images_dataset = episode_group[img_dataset_path]
-            else:
-                raise ValueError(f"Image dataset '{img_dataset_path}' not found in episode '{episode_name}'.")
-
-            if "timestamp" in episode_group:
-                timestamps = episode_group["timestamp"][:]
-            else:
-                raise ValueError(f"'timestamp' dataset not found in episode '{episode_name}'.")
-
-            imgs = []
-            previous_timestamp = None  # Initialize as None
-            for idx in range(min(len(images_dataset), len(timestamps))):
-                timestamp = timestamps[idx]
-                if previous_timestamp is not None and (timestamp - previous_timestamp) < frame_interval_s:
-                    continue
-                previous_timestamp = timestamp
-                img_array = images_dataset[idx]
-                img = Image.fromarray(img_array)
-                imgs.append(img)
-
-            if not imgs:
-                raise ValueError(f"No images found to create GIF for key '{key}' in episode '{episode_name}'.")
-
-            if output_folder is None:
-                output_folder = os.path.join(os.path.dirname(h5file_path), "gifs")
-            os.makedirs(output_folder, exist_ok=True)
-
-            gif_filename = f"{language_instruction}_{episode_name}_{key}.gif"
-            gif_filepath = os.path.join(output_folder, gif_filename)
-
-            imgs[0].save(
-                gif_filepath,
-                save_all=True,
-                append_images=imgs[1:],
-                duration=frame_interval_s * 1000,  # Duration per frame in milliseconds
-                loop=0,
-            )
+                raise ValueError(f"camera id {camera_id} not found in data")
 
     def step(self, action: dict) -> tuple[Any, SupportsFloat, bool, bool, dict[str, Any]]:
         obs, reward, terminated, truncated, info = super().step(action)
@@ -354,7 +321,7 @@ def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = Non
         self.step_count = 0
         self.prev_obs = None
         # Create a new episode group
-        episode_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_%f")
+        episode_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
         self.episode_group = self.instruction_group.create_group(episode_name)
         self.datasets = {}
         # Get git metadata

From 42b28d0359d2d3624f6e92fbf89fb2f6bc27ed27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 2 Apr 2025 08:28:01 +0200
Subject: [PATCH 42/56] fix(desk): cache token in case of program crash

---
 python/rcsss/control/fr3_desk.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/python/rcsss/control/fr3_desk.py b/python/rcsss/control/fr3_desk.py
index 036bc5cc..89769625 100644
--- a/python/rcsss/control/fr3_desk.py
+++ b/python/rcsss/control/fr3_desk.py
@@ -3,6 +3,7 @@
 import hashlib
 import json as json_module
 import logging
+import os
 import ssl
 import threading
 import time
@@ -298,6 +299,16 @@ def take_control(self, force: bool = False) -> bool:
         """
         active = self._get_active_token()
 
+        # try to read token from cache file
+        token_path = os.path.expanduser('~/.cache/rcs_fr3_token')
+        if active.id != "" and self._token.id == "" and os.path.exists(token_path):
+            with open(token_path, "r") as f:
+                content = f.read()
+            content = content.split("/n")
+            self._token = Token(content[0], content[1], content[2])
+            
+
+
         # we already have control
         if active.id != "" and self._token.id == active.id:
             _logger.info("Retaken control.")
@@ -334,12 +345,14 @@ def take_control(self, force: bool = False) -> bool:
                     if event["circle"]:
                         break
         self._token = Token(str(response["id"]), self._username, response["token"])
+        with open(token_path, "w") as f:
+            f.write("/n".join([self._token.id, self._token.owned_by, self._token.token]))
         _logger.info("Taken control.")
         return True
 
     def release_control(self) -> None:
         """
-        Explicitly relinquish control of the Desk. This will allow
+        Explicitly relinquish cofilentrol of the Desk. This will allow
         other users to take control or transfer control to the next
         user if there is an active queue of control requests.
         """

From 5525f8f4f4b981b45a58ce7aebc46e677cdd9808 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Thu, 3 Apr 2025 20:27:28 +0200
Subject: [PATCH 43/56] fix(hw camera): removed double video saving

---
 python/rcsss/camera/hw.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/python/rcsss/camera/hw.py b/python/rcsss/camera/hw.py
index dbe6d687..4c45717e 100644
--- a/python/rcsss/camera/hw.py
+++ b/python/rcsss/camera/hw.py
@@ -113,12 +113,6 @@ def recording_ongoing(self) -> bool:
     def stop_video(self):
         if len(self.writer) > 0:
             with self._buffer_lock:
-                for camera_key, writer in self.writer.items():
-                    for i in range(self._next_ring_index):
-                        frameset = self._buffer[i]
-                        assert frameset is not None
-                        # rgb to bgr as expected by opencv
-                        writer.write(frameset.frames[camera_key].camera.color.data[:, :, ::-1])
                 for camera in self.camera_names:
                     self.writer[camera].release()
                 self.writer = {}

From 209fe4fabd49e016c269a406a6e230e93bffb44c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Thu, 3 Apr 2025 20:28:37 +0200
Subject: [PATCH 44/56] fix(desk): remove guiding mode in move home

---
 python/rcsss/control/fr3_desk.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/rcsss/control/fr3_desk.py b/python/rcsss/control/fr3_desk.py
index 89769625..cbefe3db 100644
--- a/python/rcsss/control/fr3_desk.py
+++ b/python/rcsss/control/fr3_desk.py
@@ -31,9 +31,8 @@ def home(ip: str, username: str, password: str, shut: bool, unlock: bool = False
     with Desk.fci(ip, username, password, unlock=unlock):
         f = rcsss.hw.FR3(ip)
         config = rcsss.hw.FR3Config()
-        config.speed_factor = 0.7
+        config.speed_factor = 0.2
         config.ik_solver = rcsss.hw.IKSolver.franka
-        config.guiding_mode_enabled = True
         f.set_parameters(config)
         config_hand = rcsss.hw.FHConfig()
         g = rcsss.hw.FrankaHand(ip, config_hand)

From a9f1943079c0a7a65804c0e4f07e22c48a7f8df5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Thu, 3 Apr 2025 20:32:24 +0200
Subject: [PATCH 45/56] improved teleop interface

---
 python/rcsss/control/vive.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/python/rcsss/control/vive.py b/python/rcsss/control/vive.py
index 7edce7e6..216c7bf4 100644
--- a/python/rcsss/control/vive.py
+++ b/python/rcsss/control/vive.py
@@ -4,7 +4,7 @@
 from enum import IntFlag, auto
 from socket import AF_INET, SOCK_DGRAM, socket
 from struct import unpack
-from time import sleep
+import rcsss
 from rcsss.camera.interface import SimpleFrameRate
 from rcsss.camera.realsense import RealSenseCameraSet
 
@@ -42,8 +42,7 @@
 INCLUDE_ROTATION = True
 ROBOT_IP = "192.168.101.1"
 ROBOT_INSTANCE = RobotInstance.HARDWARE
-DEBUG = True
-STORAGE_PATH = "teleop_data"
+RECORD_FPS = 15
 # set camera dict to none disable cameras
 CAMERA_DICT = {
     # "wrist": "244222071045",
@@ -217,24 +216,23 @@ def environment_step_loop(self):
 
             with self._env_lock:
                 self._env.step(action)
-            rate_limiter(0.001)
+            rate_limiter(RECORD_FPS)
 
 
 def input_loop(env_rel, action_server: UDPViveActionServer, camera_set: RealSenseCameraSet | None):
     while True:
-        i = input("> ")
+        i = input("press enter to start recording, 'q' to stop, 'h' to move to home: ")
         match i:
-            case "help":
-                print("You can use `quit` to stop the program, `episode` to start a new episode")
-            case "quit":
+            case "q":
                 sys.exit(0)
-            case "episode":
+            case "h":
+                env_rel.unwrapped.robot.move_home()
+            case _:
                 # record videos
                 if camera_set is not None:
                     video_path = env_rel.path / "videos"
                     video_path.mkdir(parents=True, exist_ok=True)
-                    camera_set.record_video(env_rel.path / "videos", f"{env_rel.key}_{env_rel.timestamp}_{env_rel.episode_count}")
-                print(f'{env_rel.episode_count = }')
+                    camera_set.record_video(video_path, f"{env_rel.key}_{env_rel.timestamp}_{env_rel.episode_count}")
 
                 thread = threading.Thread(target=action_server.environment_step_loop)
                 thread.start()
@@ -265,8 +263,7 @@ def main():
                 control_mode=ControlMode.CARTESIAN_TQuart,
                 # control_mode=ControlMode.JOINTS,
                 gripper_cfg=default_fr3_hw_gripper_cfg(async_control=True),
-                max_relative_movement=(0.01, np.deg2rad(1)),
-                # max_relative_movement=(0.5, np.deg2rad(90)),
+                max_relative_movement=(0.5, np.deg2rad(90)),
                 # max_relative_movement=np.deg2rad(20),
                 relative_to=RelativeTo.CONFIGURED_ORIGIN,
             )

From 221041dc372f7dc9b17a7223f5280c6dd0ceda9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Thu, 8 May 2025 18:36:45 +0200
Subject: [PATCH 46/56] style: pyformat

---
 python/examples/liveviewer.py    | 59 --------------------------------
 python/rcsss/camera/interface.py |  5 +--
 python/rcsss/control/fr3_desk.py |  4 +--
 python/rcsss/control/run_vla.py  | 56 ++++++++++++++++++------------
 python/rcsss/control/vive.py     | 11 +++---
 python/rcsss/envs/base.py        | 30 +++++++++-------
 python/rcsss/envs/utils.py       |  4 +--
 python/rcsss/envs/wrappers.py    | 20 +++++------
 8 files changed, 71 insertions(+), 118 deletions(-)
 delete mode 100644 python/examples/liveviewer.py

diff --git a/python/examples/liveviewer.py b/python/examples/liveviewer.py
deleted file mode 100644
index f92447d2..00000000
--- a/python/examples/liveviewer.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import argparse
-import cv2
-from matplotlib import pyplot as plt
-from rcsss.camera.interface import BaseCameraConfig
-from rcsss.camera.realsense import RealSenseCameraSet, RealSenseSetConfig
-
-
-if __name__ == "__main__":
-    # arg parse to get the key for the camera
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--camera", type=str, default="side")
-    args = parser.parse_args()
-
-    cameras = {
-        "side2": BaseCameraConfig(identifier="244222071045"), # old wrist
-        # "wrist": BaseCameraConfig(identifier="218622278131"), # new realsense
-        "bird_eye": BaseCameraConfig(identifier="243522070364"), # bird eye
-        # "side": BaseCameraConfig(identifier="243522070385"),
-        "side": BaseCameraConfig(identifier="243522070385"), # side
-    }
-
-    cam_cfg = RealSenseSetConfig(
-        cameras=cameras,
-        resolution_width=1280,
-        resolution_height=720,
-        frame_rate=15,
-        enable_imu=False,  # does not work with imu, why?
-        enable_ir=True,
-        enable_ir_emitter=False,
-    )
-
-    camera_set = RealSenseCameraSet(cam_cfg)
-    frame_set = camera_set.poll_frame_set()
-
-    # data = cv2.rotate(frame_set.frames[args.camera].camera.color.data, cv2.ROTATE_90_COUNTERCLOCKWISE)
-    data = frame_set.frames[args.camera].camera.color.data
-    img = plt.imshow(data)
-    plt.show(block=False)
-    plt.pause(0.1)
-
-    # while True:
-    for i in range(300):
-        frame_set = camera_set.poll_frame_set()
-
-        # # save the images in the frameset
-        # for key, frame in frame_set.frames.items():
-        #     cv2.imwrite(f"{key}.png", frame.camera.color.data)
-        # break
-
-        # img.set_data(frame_set.frames[args.camera].camera.color.data)
-        # # data = cv2.rotate(frame_set.frames[args.camera].camera.color.data, cv2.ROTATE_90_COUNTERCLOCKWISE)
-        # plt.draw()
-        # plt.pause(0.5)
-
-    # save the images in the frameset
-    for key, frame in frame_set.frames.items():
-        # cv2.imwrite(f"{key}.png", frame.camera.color.data)
-        # save in rgb
-        cv2.imwrite(f"{key}.png", cv2.cvtColor(frame.camera.color.data, cv2.COLOR_BGR2RGB))
diff --git a/python/rcsss/camera/interface.py b/python/rcsss/camera/interface.py
index fad9bdb9..e7ab9257 100644
--- a/python/rcsss/camera/interface.py
+++ b/python/rcsss/camera/interface.py
@@ -1,7 +1,7 @@
+import logging
 from dataclasses import dataclass
 from datetime import datetime
-import logging
-from time import time, sleep
+from time import sleep, time
 from typing import Any, Protocol
 
 from pydantic import BaseModel, Field
@@ -9,6 +9,7 @@
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
+
 class SimpleFrameRate:
     def __init__(self):
         self.t = None
diff --git a/python/rcsss/control/fr3_desk.py b/python/rcsss/control/fr3_desk.py
index cbefe3db..16fc2265 100644
--- a/python/rcsss/control/fr3_desk.py
+++ b/python/rcsss/control/fr3_desk.py
@@ -299,14 +299,12 @@ def take_control(self, force: bool = False) -> bool:
         active = self._get_active_token()
 
         # try to read token from cache file
-        token_path = os.path.expanduser('~/.cache/rcs_fr3_token')
+        token_path = os.path.expanduser("~/.cache/rcs_fr3_token")
         if active.id != "" and self._token.id == "" and os.path.exists(token_path):
             with open(token_path, "r") as f:
                 content = f.read()
             content = content.split("/n")
             self._token = Token(content[0], content[1], content[2])
-            
-
 
         # we already have control
         if active.id != "" and self._token.id == active.id:
diff --git a/python/rcsss/control/run_vla.py b/python/rcsss/control/run_vla.py
index 943be21a..396f5196 100644
--- a/python/rcsss/control/run_vla.py
+++ b/python/rcsss/control/run_vla.py
@@ -1,13 +1,17 @@
-from dataclasses import dataclass
 import logging
+from dataclasses import dataclass
 from time import sleep
-from PIL import Image
+
 import cv2
 import gymnasium as gym
-import numpy as np
+import json_numpy
 import matplotlib.pyplot as plt
-from rcsss.envs.wrappers import RHCWrapper, StorageWrapper
+import numpy as np
+import requests
+from agents.client import RemoteAgent
+from agents.policies import Act, Obs
 from omegaconf import OmegaConf
+from PIL import Image
 from rcsss._core.sim import FR3, FR3State
 from rcsss.camera.realsense import RealSenseCameraSet
 from rcsss.config import read_config_yaml
@@ -16,13 +20,17 @@
 from rcsss.control.fr3_desk import FCI, Desk, DummyResourceManager
 from rcsss.control.utils import load_creds_fr3_desk
 from rcsss.envs.base import ControlMode, RelativeTo, RobotInstance
-from rcsss.envs.factories import default_fr3_hw_gripper_cfg, default_fr3_hw_robot_cfg, default_fr3_sim_gripper_cfg, default_fr3_sim_robot_cfg, default_mujoco_cameraset_cfg, default_realsense, fr3_hw_env, fr3_sim_env
-import requests
-import json_numpy
-
-from agents.policies import Act, Obs
-from agents.client import RemoteAgent
-from rcsss.envs.base import ControlMode
+from rcsss.envs.factories import (
+    default_fr3_hw_gripper_cfg,
+    default_fr3_hw_robot_cfg,
+    default_fr3_sim_gripper_cfg,
+    default_fr3_sim_robot_cfg,
+    default_mujoco_cameraset_cfg,
+    default_realsense,
+    fr3_hw_env,
+    fr3_sim_env,
+)
+from rcsss.envs.wrappers import RHCWrapper, StorageWrapper
 
 json_numpy.patch()
 
@@ -37,7 +45,13 @@
 INSTRUCTION = "pick up the can"
 
 default_cfg = OmegaConf.create(
-    {"host": "dep-eng-air-p-1.hosts.utn.de", "port": 9000, "model": "openvla", "robot_ip": "192.168.101.1", "debug": True}
+    {
+        "host": "dep-eng-air-p-1.hosts.utn.de",
+        "port": 9000,
+        "model": "openvla",
+        "robot_ip": "192.168.101.1",
+        "debug": True,
+    }
     # {"host": "dep-eng-air-p-1.hosts.utn.de", "port": 7000, "model": "octo", "robot_ip": "192.168.101.1", "debug": False}
     # {"host": "localhost", "port": 8080, "model": "chatgpt", "robot_ip": "192.168.101.1", "debug": False}
 )
@@ -65,11 +79,7 @@ def get_obs(self, obs: dict) -> Obs:
         # side = obs["frames"]["default_free"]["rgb"]
         # side = obs["frames"]["openvla_view"]["rgb"]
 
-        side = np.array(Image.fromarray(side).resize(
-                            (256, 256), Image.Resampling.LANCZOS))
-
-
-        
+        side = np.array(Image.fromarray(side).resize((256, 256), Image.Resampling.LANCZOS))
 
         # center crop square
         # h = side.shape[0]
@@ -91,7 +101,7 @@ def get_obs(self, obs: dict) -> Obs:
 
         # return {"bird_eye": bird_eye, "wrist": wrist, "side": side, "gripper": obs["gripper"]}
         # return Obs(rgb_bird_eye=bird_eye, rgb_wrist=wrist, rgb_side=side, gripper=obs["gripper"], rgb=obs["frames"])
-        return Obs(rgb_side=side, gripper=obs["gripper"]) #, info={"rgb": obs["frames"], "xyzrpy": obs["xyzrpy"]})
+        return Obs(rgb_side=side, gripper=obs["gripper"])  # , info={"rgb": obs["frames"], "xyzrpy": obs["xyzrpy"]})
 
     def step(self, action) -> tuple[bool, list[str], dict]:
         # TODO Check if the model indicates when an action is finished.
@@ -142,7 +152,9 @@ def loop(self):
 def main():
     if ROBOT_INSTANCE == RobotInstance.HARDWARE:
         user, pw = load_creds_fr3_desk()
-        resource_manger = FCI(Desk(cfg.robot_ip, user, pw), unlock=False, lock_when_done=False, guiding_mode_when_done=True)
+        resource_manger = FCI(
+            Desk(cfg.robot_ip, user, pw), unlock=False, lock_when_done=False, guiding_mode_when_done=True
+        )
     else:
         resource_manger = DummyResourceManager()
     with resource_manger:
@@ -168,13 +180,13 @@ def main():
             # )
             env = fr3_hw_env(
                 ip=cfg.robot_ip,
-                camera_set = camera_set,
+                camera_set=camera_set,
                 robot_cfg=default_fr3_hw_robot_cfg(),
                 control_mode=ControlMode.CARTESIAN_TRPY,
                 collision_guard=None,
                 gripper_cfg=default_fr3_hw_gripper_cfg(),
                 max_relative_movement=(0.1, np.deg2rad(5)),
-            relative_to=RelativeTo.LAST_STEP,
+                relative_to=RelativeTo.LAST_STEP,
             )
         else:
             env = fr3_sim_env(
@@ -206,4 +218,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/python/rcsss/control/vive.py b/python/rcsss/control/vive.py
index 9c4b26c5..19847c96 100644
--- a/python/rcsss/control/vive.py
+++ b/python/rcsss/control/vive.py
@@ -4,12 +4,12 @@
 from enum import IntFlag, auto
 from socket import AF_INET, SOCK_DGRAM, socket
 from struct import unpack
-import rcsss
-from rcsss.camera.interface import SimpleFrameRate
-from rcsss.camera.realsense import RealSenseCameraSet
 
 import numpy as np
+import rcsss
 from rcsss._core.common import RPY, Pose
+from rcsss.camera.interface import SimpleFrameRate
+from rcsss.camera.realsense import RealSenseCameraSet
 from rcsss.control.fr3_desk import FCI, Desk, DummyResourceManager
 from rcsss.control.utils import load_creds_fr3_desk
 from rcsss.envs.base import (
@@ -30,7 +30,6 @@
 )
 from rcsss.envs.wrappers import StorageWrapperHDF5, StorageWrapperNumpy
 
-
 logger = logging.getLogger(__name__)
 
 EGO_LOCK = False
@@ -51,7 +50,6 @@
 # CAMERA_DICT = None
 
 
-
 class Button(IntFlag):
     L_TRIGGER = auto()
     L_SQUEEZE = auto()
@@ -174,7 +172,6 @@ def run(self):
                     with self._resource_lock:
                         self._last_controller_pose = last_controller_pose
 
-
                 if Button(int(unpacked[7])) & self._grp_btn and not Button(int(self._buttons)) & self._grp_btn:
                     # just pressed
                     self._grp_pos = 0
@@ -255,7 +252,7 @@ def main():
             camera_set = default_realsense(CAMERA_DICT)
             env_rel = fr3_hw_env(
                 ip=ROBOT_IP,
-                camera_set = camera_set,
+                camera_set=camera_set,
                 # collision_guard="lab",
                 robot_cfg=default_fr3_hw_robot_cfg(async_control=True),
                 control_mode=ControlMode.CARTESIAN_TQuart,
diff --git a/python/rcsss/envs/base.py b/python/rcsss/envs/base.py
index 59d058b2..ab3bfc79 100644
--- a/python/rcsss/envs/base.py
+++ b/python/rcsss/envs/base.py
@@ -400,12 +400,15 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
                 )
                 self._last_action = clipped_pose_offset
             else:
-                pose_diff = common.Pose(
-                    translation=action[self.trpy_key][:3],
-                    rpy_vector=action[self.trpy_key][3:],
-                ) * self._last_action.inverse()
-                clipped_pose_diff = (
-                    pose_diff.limit_translation_length(self.max_mov[0]).limit_rotation_angle(self.max_mov[1])
+                pose_diff = (
+                    common.Pose(
+                        translation=action[self.trpy_key][:3],
+                        rpy_vector=action[self.trpy_key][3:],
+                    )
+                    * self._last_action.inverse()
+                )
+                clipped_pose_diff = pose_diff.limit_translation_length(self.max_mov[0]).limit_rotation_angle(
+                    self.max_mov[1]
                 )
                 clipped_pose_offset = clipped_pose_diff * self._last_action
                 self._last_action = clipped_pose_offset
@@ -440,12 +443,15 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
                 )
                 self._last_action = clipped_pose_offset
             else:
-                pose_diff = common.Pose(
-                    translation=action[self.tquart_key][:3],
-                    quaternion=action[self.tquart_key][3:],
-                ) * self._last_action.inverse()
-                clipped_pose_diff = (
-                    pose_diff.limit_translation_length(self.max_mov[0]).limit_rotation_angle(self.max_mov[1])
+                pose_diff = (
+                    common.Pose(
+                        translation=action[self.tquart_key][:3],
+                        quaternion=action[self.tquart_key][3:],
+                    )
+                    * self._last_action.inverse()
+                )
+                clipped_pose_diff = pose_diff.limit_translation_length(self.max_mov[0]).limit_rotation_angle(
+                    self.max_mov[1]
                 )
                 clipped_pose_offset = clipped_pose_diff * self._last_action
                 self._last_action = clipped_pose_offset
diff --git a/python/rcsss/envs/utils.py b/python/rcsss/envs/utils.py
index 9b9590e2..25f7b134 100644
--- a/python/rcsss/envs/utils.py
+++ b/python/rcsss/envs/utils.py
@@ -6,7 +6,7 @@
 import numpy as np
 import rcsss
 from rcsss import sim
-from rcsss._core.hw import FR3Config, IKController
+from rcsss._core.hw import FR3Config, IKSolver
 from rcsss._core.sim import CameraType
 from rcsss.camera.interface import BaseCameraConfig
 from rcsss.camera.realsense import RealSenseCameraSet, RealSenseSetConfig
@@ -27,7 +27,7 @@ def default_fr3_hw_robot_cfg():
     robot_cfg = FR3Config()
     robot_cfg.tcp_offset = rcsss.common.Pose(rcsss.common.FrankaHandTCPOffset())
     robot_cfg.speed_factor = 0.1
-    robot_cfg.controller = IKController.robotics_library
+    robot_cfg.ik_solver = IKSolver.rcs
     return robot_cfg
 
 
diff --git a/python/rcsss/envs/wrappers.py b/python/rcsss/envs/wrappers.py
index 7961fad2..3ae0968e 100644
--- a/python/rcsss/envs/wrappers.py
+++ b/python/rcsss/envs/wrappers.py
@@ -1,20 +1,15 @@
-from datetime import datetime
 import os
+import subprocess
+from datetime import datetime
 from pathlib import Path
 from typing import Any, SupportsFloat
 
 import gymnasium as gym
-
-import os
-
-import gymnasium as gym
+import h5py
 import numpy as np
 from PIL import Image
 from rcsss.camera.hw import BaseHardwareCameraSet
 
-import subprocess
-import h5py
-
 
 class StorageWrapperNumpy(gym.Wrapper):
     # TODO: this should also record the instruction
@@ -57,7 +52,6 @@ def __init__(
         # get git diff
         os.system(f'git diff --submodule=diff > {os.path.join(str(self.path), "git_diff.txt")}')
 
-
     def flush(self):
         """writes data to disk"""
         if len(self.data) == 0 or self.step_count == 0:
@@ -192,7 +186,6 @@ def __init__(
         else:
             self.instruction_group = self.h5file.create_group(self.key)
 
-
     def append_to_hdf5(self, group, data_dict, index):
         for key, value in data_dict.items():
             if isinstance(value, dict):
@@ -233,7 +226,12 @@ def append_to_hdf5(self, group, data_dict, index):
                     initial_shape = (index + 1,) + shape
                     maxshape = (None,) + shape
                     dataset = group.create_dataset(
-                        dataset_name, shape=initial_shape, maxshape=maxshape, chunks=True, dtype=dtype , compression="gzip"
+                        dataset_name,
+                        shape=initial_shape,
+                        maxshape=maxshape,
+                        chunks=True,
+                        dtype=dtype,
+                        compression="gzip",
                     )
                     self.datasets[full_dataset_path] = dataset
                 else:

From 602afe52cc73134c916b2643ec26a227325e674b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Thu, 8 May 2025 19:06:12 +0200
Subject: [PATCH 47/56] chore: linter fixes

---
 python/rcsss/control/run_vla.py | 24 +++++++--------------
 python/rcsss/control/vive.py    | 17 ++++++++-------
 python/rcsss/envs/utils.py      |  6 ++++--
 python/rcsss/envs/wrappers.py   | 37 ++++++++++++++++-----------------
 4 files changed, 38 insertions(+), 46 deletions(-)

diff --git a/python/rcsss/control/run_vla.py b/python/rcsss/control/run_vla.py
index 396f5196..37207387 100644
--- a/python/rcsss/control/run_vla.py
+++ b/python/rcsss/control/run_vla.py
@@ -1,36 +1,26 @@
 import logging
-from dataclasses import dataclass
-from time import sleep
 
-import cv2
 import gymnasium as gym
 import json_numpy
-import matplotlib.pyplot as plt
 import numpy as np
-import requests
 from agents.client import RemoteAgent
-from agents.policies import Act, Obs
+from agents.policies import Obs
 from omegaconf import OmegaConf
 from PIL import Image
-from rcsss._core.sim import FR3, FR3State
-from rcsss.camera.realsense import RealSenseCameraSet
-from rcsss.config import read_config_yaml
 
-# from rcsss.desk import FCI, Desk
+from python.rcsss.envs.factories import fr3_hw_env, fr3_sim_env
 from rcsss.control.fr3_desk import FCI, Desk, DummyResourceManager
 from rcsss.control.utils import load_creds_fr3_desk
 from rcsss.envs.base import ControlMode, RelativeTo, RobotInstance
-from rcsss.envs.factories import (
+from rcsss.envs.utils import (
     default_fr3_hw_gripper_cfg,
     default_fr3_hw_robot_cfg,
     default_fr3_sim_gripper_cfg,
     default_fr3_sim_robot_cfg,
     default_mujoco_cameraset_cfg,
     default_realsense,
-    fr3_hw_env,
-    fr3_sim_env,
 )
-from rcsss.envs.wrappers import RHCWrapper, StorageWrapper
+from rcsss.envs.wrappers import RHCWrapper, StorageWrapperNumpy
 
 json_numpy.patch()
 
@@ -156,7 +146,7 @@ def main():
             Desk(cfg.robot_ip, user, pw), unlock=False, lock_when_done=False, guiding_mode_when_done=True
         )
     else:
-        resource_manger = DummyResourceManager()
+        resource_manger = DummyResourceManager() # type: ignore
     with resource_manger:
 
         if ROBOT_INSTANCE == RobotInstance.HARDWARE:
@@ -201,12 +191,12 @@ def main():
             )
             env.get_wrapper_attr("sim").open_gui()
         if not DEBUG:
-            env = StorageWrapper(env, path="octo_realv1_async", instruction=INSTRUCTION)
+            env = StorageWrapperNumpy(env, path="octo_realv1_async", instruction=INSTRUCTION)
 
             # record videos
             video_path = env.path / "videos"
             video_path.mkdir(parents=True, exist_ok=True)
-            camera_set.record_video(video_path, 0)
+            camera_set.record_video(video_path, "0")
 
         env = RHCWrapper(env, exec_horizon=1)
         controller = RobotControl(env, INSTRUCTION, cfg.host, cfg.port, cfg.model)
diff --git a/python/rcsss/control/vive.py b/python/rcsss/control/vive.py
index 19847c96..5c2c76af 100644
--- a/python/rcsss/control/vive.py
+++ b/python/rcsss/control/vive.py
@@ -6,7 +6,6 @@
 from struct import unpack
 
 import numpy as np
-import rcsss
 from rcsss._core.common import RPY, Pose
 from rcsss.camera.interface import SimpleFrameRate
 from rcsss.camera.realsense import RealSenseCameraSet
@@ -28,7 +27,7 @@
     default_fr3_sim_robot_cfg,
     default_realsense,
 )
-from rcsss.envs.wrappers import StorageWrapperHDF5, StorageWrapperNumpy
+from rcsss.envs.wrappers import StorageWrapperHDF5
 
 logger = logging.getLogger(__name__)
 
@@ -48,6 +47,9 @@
     "side": "243522070385",
 }
 # CAMERA_DICT = None
+DEBUG = True
+STORAGE_PATH = "data"
+
 
 
 class Button(IntFlag):
@@ -290,12 +292,11 @@ def main():
 
         env_rel.reset()
 
-        with env_rel:
-            with UDPViveActionServer(VIVE_HOST, VIVE_PORT, env_rel) as action_server:
-                if not DEBUG:
-                    input_loop(env_rel, action_server, camera_set)
-                else:
-                    action_server.environment_step_loop()
+        with env_rel, UDPViveActionServer(VIVE_HOST, VIVE_PORT, env_rel) as action_server:
+            if not DEBUG:
+                input_loop(env_rel, action_server, camera_set)
+            else:
+                action_server.environment_step_loop()
 
 
 if __name__ == "__main__":
diff --git a/python/rcsss/envs/utils.py b/python/rcsss/envs/utils.py
index 25f7b134..b4b6537f 100644
--- a/python/rcsss/envs/utils.py
+++ b/python/rcsss/envs/utils.py
@@ -23,19 +23,21 @@ def default_fr3_sim_robot_cfg(mjcf: str | Path = "fr3_empty_world") -> sim.FR3Co
     return cfg
 
 
-def default_fr3_hw_robot_cfg():
+def default_fr3_hw_robot_cfg(async_control: bool = False):
     robot_cfg = FR3Config()
     robot_cfg.tcp_offset = rcsss.common.Pose(rcsss.common.FrankaHandTCPOffset())
     robot_cfg.speed_factor = 0.1
     robot_cfg.ik_solver = IKSolver.rcs
+    robot_cfg.async_control = async_control
     return robot_cfg
 
 
-def default_fr3_hw_gripper_cfg():
+def default_fr3_hw_gripper_cfg(async_control: bool = False):
     gripper_cfg = rcsss.hw.FHConfig()
     gripper_cfg.epsilon_inner = gripper_cfg.epsilon_outer = 0.1
     gripper_cfg.speed = 0.1
     gripper_cfg.force = 30
+    gripper_cfg.async_control = async_control
     return gripper_cfg
 
 
diff --git a/python/rcsss/envs/wrappers.py b/python/rcsss/envs/wrappers.py
index 3ae0968e..4824419c 100644
--- a/python/rcsss/envs/wrappers.py
+++ b/python/rcsss/envs/wrappers.py
@@ -95,9 +95,10 @@ def create_gif(path: str, episode: int, key: str, frame_interval_s: float = 0.5)
             )
         else:
             if "observation" in data and "frames" in data["observation"] and key in data["observation"]["frames"]:
-                raise ValueError(f"Key {key} not found in {data['observation']['frames'].keys()}")
+                msg = f"Key {key} not found in {data['observation']['frames'].keys()}"
             else:
-                raise ValueError(f"Key {key} not found in data")
+                msg = f"Key {key} not found in data"
+            raise ValueError(msg)
 
     def step(self, action: dict) -> tuple[Any, SupportsFloat, bool, bool, dict[str, Any]]:
         obs, reward, terminated, truncated, info = super().step(action)
@@ -190,10 +191,7 @@ def append_to_hdf5(self, group, data_dict, index):
         for key, value in data_dict.items():
             if isinstance(value, dict):
                 # Handle subgroup
-                if key not in group:
-                    subgroup = group.create_group(key)
-                else:
-                    subgroup = group[key]
+                subgroup = group.create_group(key) if key not in group else group[key]
                 self.append_to_hdf5(subgroup, value, index)
             else:
                 # Handle dataset
@@ -212,19 +210,21 @@ def append_to_hdf5(self, group, data_dict, index):
                         shape = ()
                     elif isinstance(value, np.ndarray):
                         # Numpy array
+                        v = value
                         dtype = value.dtype
                         shape = value.shape
                     else:
                         # Other types, try to convert to numpy array
                         try:
-                            value = np.array(value)
-                            dtype = value.dtype
-                            shape = value.shape
+                            v = np.array(value)
+                            dtype = v.dtype
+                            shape = v.shape
                         except Exception as e:
-                            raise ValueError(f"Unsupported data type for key '{key}': {type(value)}") from e
+                            msg = f"Unsupported data type for key '{key}': {type(value)}"
+                            raise ValueError(msg) from e
                     # Create dataset
-                    initial_shape = (index + 1,) + shape
-                    maxshape = (None,) + shape
+                    initial_shape = (index + 1, *shape)
+                    maxshape = (None, *shape)
                     dataset = group.create_dataset(
                         dataset_name,
                         shape=initial_shape,
@@ -240,12 +240,10 @@ def append_to_hdf5(self, group, data_dict, index):
                         new_size = index + 1
                         dataset.resize(new_size, axis=0)
                 # Store value
-                if isinstance(value, str):
-                    dataset[index] = value
-                elif np.isscalar(value):
+                if isinstance(value, str) or np.isscalar(value):
                     dataset[index] = value
                 else:
-                    dataset[index, ...] = value
+                    dataset[index, ...] = v
 
     def flush(self):
         """Writes data to disk."""
@@ -267,7 +265,7 @@ def create_gif(
         episode_name: str,
         camera_id: str,
         frame_interval_s: float = 0.5,
-        output_folder: str = None,
+        output_folder: str | None = None,
     ):
 
         with h5py.File(h5file_path, "r") as data:
@@ -302,7 +300,8 @@ def create_gif(
                     loop=0,
                 )
             else:
-                raise ValueError(f"camera id {camera_id} not found in data")
+                msg = f"camera id {camera_id} not found in data"
+                raise ValueError(msg)
 
     def step(self, action: dict) -> tuple[Any, SupportsFloat, bool, bool, dict[str, Any]]:
         obs, reward, terminated, truncated, info = super().step(action)
@@ -327,7 +326,7 @@ def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = Non
             git_diff = subprocess.check_output(["git", "diff", "--submodule=diff"]).decode("utf-8")
             git_commit_id = subprocess.check_output(["git", "log", "--format=%H", "-n", "1"]).decode("utf-8").strip()
             git_submodule_status = subprocess.check_output(["git", "submodule", "status"]).decode("utf-8")
-        except Exception as e:
+        except subprocess.CalledProcessError:
             git_diff = ""
             git_commit_id = ""
             git_submodule_status = ""

From d22e5e555c7ff0cec6591e34298698e880e75d23 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Thu, 8 May 2025 19:28:34 +0200
Subject: [PATCH 48/56] fix(env): moved close of control thread to hw env

- removed vla script
- fixed close control thread in hardware
---
 python/rcsss/control/run_vla.py | 211 --------------------------------
 python/rcsss/envs/base.py       |   5 +-
 python/rcsss/envs/hw.py         |   4 +
 3 files changed, 6 insertions(+), 214 deletions(-)
 delete mode 100644 python/rcsss/control/run_vla.py

diff --git a/python/rcsss/control/run_vla.py b/python/rcsss/control/run_vla.py
deleted file mode 100644
index 37207387..00000000
--- a/python/rcsss/control/run_vla.py
+++ /dev/null
@@ -1,211 +0,0 @@
-import logging
-
-import gymnasium as gym
-import json_numpy
-import numpy as np
-from agents.client import RemoteAgent
-from agents.policies import Obs
-from omegaconf import OmegaConf
-from PIL import Image
-
-from python.rcsss.envs.factories import fr3_hw_env, fr3_sim_env
-from rcsss.control.fr3_desk import FCI, Desk, DummyResourceManager
-from rcsss.control.utils import load_creds_fr3_desk
-from rcsss.envs.base import ControlMode, RelativeTo, RobotInstance
-from rcsss.envs.utils import (
-    default_fr3_hw_gripper_cfg,
-    default_fr3_hw_robot_cfg,
-    default_fr3_sim_gripper_cfg,
-    default_fr3_sim_robot_cfg,
-    default_mujoco_cameraset_cfg,
-    default_realsense,
-)
-from rcsss.envs.wrappers import RHCWrapper, StorageWrapperNumpy
-
-json_numpy.patch()
-
-
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-
-
-ROBOT_INSTANCE = RobotInstance.HARDWARE
-# CAMERA_GUI = "rgb_side"
-# INSTRUCTION = "pick up the blue cube"
-INSTRUCTION = "pick up the can"
-
-default_cfg = OmegaConf.create(
-    {
-        "host": "dep-eng-air-p-1.hosts.utn.de",
-        "port": 9000,
-        "model": "openvla",
-        "robot_ip": "192.168.101.1",
-        "debug": True,
-    }
-    # {"host": "dep-eng-air-p-1.hosts.utn.de", "port": 7000, "model": "octo", "robot_ip": "192.168.101.1", "debug": False}
-    # {"host": "localhost", "port": 8080, "model": "chatgpt", "robot_ip": "192.168.101.1", "debug": False}
-)
-cli_conf = OmegaConf.from_cli()
-cfg = OmegaConf.merge(cli_conf, default_cfg)
-DEBUG = cfg.debug
-
-
-class RobotControl:
-    def __init__(self, env: gym.Env, instruction: str, model_host: str, model_port: int, model_name: str):
-        self.env = env
-        self.gripper_state = 1
-
-        self.instruction = instruction
-        self.remote_agent = RemoteAgent(model_host, model_port, model_name)
-
-        if not DEBUG:
-            self.env.get_wrapper_attr("log_files")(self.remote_agent.git_status())
-
-    def get_obs(self, obs: dict) -> Obs:
-        # bird_eye=obs["frames"]["bird_eye"]["rgb"]
-        # wrist=obs["frames"]["wrist"]["rgb"]
-
-        side = obs["frames"]["side"]["rgb"]
-        # side = obs["frames"]["default_free"]["rgb"]
-        # side = obs["frames"]["openvla_view"]["rgb"]
-
-        side = np.array(Image.fromarray(side).resize((256, 256), Image.Resampling.LANCZOS))
-
-        # center crop square
-        # h = side.shape[0]
-        # start_w = (side.shape[1] - h) // 2
-        # end_w = start_w + h
-        # side = side[:, start_w:end_w]
-        # bird_eye = bird_eye[:, start_w:end_w]
-        # wrist = wrist[:, start_w:end_w]
-
-        # rotate side by 90 degrees
-        # side = cv2.rotate(side, cv2.ROTATE_90_COUNTERCLOCKWISE)
-
-        # rescale side to 256x256
-        # side = cv2.resize(side, dsize=(256, 256), interpolation=cv2.INTER_CUBIC)
-        # bird_eye = cv2.resize(bird_eye, dsize=(256, 256), interpolation=cv2.INTER_CUBIC)
-        # wrist = cv2.resize(wrist, dsize=(256, 256), interpolation=cv2.INTER_CUBIC)
-
-        # TODO: add some noise, use other cameras, record what is happening
-
-        # return {"bird_eye": bird_eye, "wrist": wrist, "side": side, "gripper": obs["gripper"]}
-        # return Obs(rgb_bird_eye=bird_eye, rgb_wrist=wrist, rgb_side=side, gripper=obs["gripper"], rgb=obs["frames"])
-        return Obs(rgb_side=side, gripper=obs["gripper"])  # , info={"rgb": obs["frames"], "xyzrpy": obs["xyzrpy"]})
-
-    def step(self, action) -> tuple[bool, list[str], dict]:
-        # TODO Check if the model indicates when an action is finished.
-        logger.info("Executing command")
-        # only translation
-        # action[3:6] = [0, 0, 0]
-        # obs, _, _, truncated, info = self.env.step({"xyzrpy": action[:6], "gripper": action[6]})
-        print("action", action)
-        obs, _, _, truncated, info = self.env.step(action)
-        del info["observations"]
-        print("info", info)
-        # TODO: ik success from info
-        return False, obs, info
-
-    def loop(self):
-        # Initialize the environment and obtain the initial observation
-        obs, _ = self.env.reset()
-        info = {}
-
-        logger.info("Initializing")
-        obs_dict = self.get_obs(obs)
-
-        # img = plt.imshow(obs_dict[CAMERA_GUI])
-        # plt.show(block=False)
-        # plt.pause(0.1)
-        self.remote_agent.reset(obs_dict, instruction=self.instruction)
-
-        logger.info("Starting control loop")
-        while True:
-            logger.info("Getting action")
-            obs_dict.info = info
-            action = self.remote_agent.act(obs_dict)
-            if action.done:
-                logger.info("done issues by agent, shutting down")
-                break
-            logger.info(f"action issued by agent: {action.action}")
-
-            done, obs, info = self.step(action.action)
-            obs_dict = self.get_obs(obs)
-            # img.set_data(obs_dict.rgb_side)
-            # plt.draw()
-            # plt.pause(0.5)
-
-            if done:
-                break
-
-
-def main():
-    if ROBOT_INSTANCE == RobotInstance.HARDWARE:
-        user, pw = load_creds_fr3_desk()
-        resource_manger = FCI(
-            Desk(cfg.robot_ip, user, pw), unlock=False, lock_when_done=False, guiding_mode_when_done=True
-        )
-    else:
-        resource_manger = DummyResourceManager() # type: ignore
-    with resource_manger:
-
-        if ROBOT_INSTANCE == RobotInstance.HARDWARE:
-            camera_dict = {
-                # "wrist": "244222071045",
-                # "bird_eye": "243522070364",
-                # "side": "243522070385",
-                "side": "244222071045",
-            }
-
-            camera_set = default_realsense(camera_dict)
-            # env = utils.fr3_hw_env(
-            #     ip=cfg.robot_ip,
-            #     camera_set = camera_set,
-            #     control_mode=ControlMode.CARTESIAN_TRPY,
-            #     collision_guard=None,
-            #     gripper=True,
-            #     max_relative_movement=(0.1, np.deg2rad(5)),
-            #     asynchronous=True,
-            # relative_to=RelativeTo.LAST_STEP,
-            # )
-            env = fr3_hw_env(
-                ip=cfg.robot_ip,
-                camera_set=camera_set,
-                robot_cfg=default_fr3_hw_robot_cfg(),
-                control_mode=ControlMode.CARTESIAN_TRPY,
-                collision_guard=None,
-                gripper_cfg=default_fr3_hw_gripper_cfg(),
-                max_relative_movement=(0.1, np.deg2rad(5)),
-                relative_to=RelativeTo.LAST_STEP,
-            )
-        else:
-            env = fr3_sim_env(
-                control_mode=ControlMode.CARTESIAN_TRPY,
-                robot_cfg=default_fr3_sim_robot_cfg(),
-                gripper_cfg=default_fr3_sim_gripper_cfg(),
-                camera_set_cfg=default_mujoco_cameraset_cfg(),
-                max_relative_movement=(0.5, np.deg2rad(0)),
-                # mjcf="/home/juelg/code/frankcsy/robot-control-stack/build/_deps/scenes-src/scenes/fr3_empty_world/scene.xml",
-                mjcf="/home/juelg/code/frankcsy/models/scenes/lab/scene.xml",
-                relative_to=RelativeTo.LAST_STEP,
-            )
-            env.get_wrapper_attr("sim").open_gui()
-        if not DEBUG:
-            env = StorageWrapperNumpy(env, path="octo_realv1_async", instruction=INSTRUCTION)
-
-            # record videos
-            video_path = env.path / "videos"
-            video_path.mkdir(parents=True, exist_ok=True)
-            camera_set.record_video(video_path, "0")
-
-        env = RHCWrapper(env, exec_horizon=1)
-        controller = RobotControl(env, INSTRUCTION, cfg.host, cfg.port, cfg.model)
-        # controller = RobotControl(env, "Pick up the Yellow Brick from the Top", cfg.host, cfg.port, cfg.model)
-        # controller = RobotControl(env, "Move Yellow Box to the Right", cfg.host, cfg.port, cfg.model)
-        input("press enter to start")
-        with env:
-            controller.loop()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/python/rcsss/envs/base.py b/python/rcsss/envs/base.py
index ab3bfc79..7ad8da9c 100644
--- a/python/rcsss/envs/base.py
+++ b/python/rcsss/envs/base.py
@@ -7,8 +7,8 @@
 
 import gymnasium as gym
 import numpy as np
+from python.rcsss._core.common import Robot
 from rcsss import common, sim
-from rcsss._core import hw
 from rcsss.camera.interface import BaseCameraSet
 from rcsss.envs.space_utils import (
     ActObsInfoWrapper,
@@ -160,7 +160,7 @@ class FR3Env(gym.Env):
     y
     """
 
-    def __init__(self, robot: hw.FR3, control_mode: ControlMode):
+    def __init__(self, robot: Robot, control_mode: ControlMode):
         self.robot = robot
         self._control_mode_overrides = [control_mode]
         self.action_space: gym.spaces.Dict
@@ -255,7 +255,6 @@ def reset(
         return self.get_obs(), {}
 
     def close(self):
-        self.robot.stop_control_thread()
         super().close()
 
 
diff --git a/python/rcsss/envs/hw.py b/python/rcsss/envs/hw.py
index 5ce0f755..d55f00db 100644
--- a/python/rcsss/envs/hw.py
+++ b/python/rcsss/envs/hw.py
@@ -29,3 +29,7 @@ def reset(
         self, seed: int | None = None, options: dict[str, Any] | None = None
     ) -> tuple[dict[str, Any], dict[str, Any]]:
         return super().reset(seed=seed, options=options)
+
+    def close(self):
+        self.hw_robot.stop_control_thread()
+        super().close()

From eb93d18bb6f2b5f178703fb6e6e26be2c97ef2b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Fri, 9 May 2025 12:35:57 +0200
Subject: [PATCH 49/56] refactor(control): remove vive and recorder app code

---
 python/rcsss/control/vive.py  | 303 --------------------------
 python/rcsss/envs/wrappers.py | 395 ----------------------------------
 2 files changed, 698 deletions(-)
 delete mode 100644 python/rcsss/control/vive.py
 delete mode 100644 python/rcsss/envs/wrappers.py

diff --git a/python/rcsss/control/vive.py b/python/rcsss/control/vive.py
deleted file mode 100644
index 5c2c76af..00000000
--- a/python/rcsss/control/vive.py
+++ /dev/null
@@ -1,303 +0,0 @@
-import logging
-import sys
-import threading
-from enum import IntFlag, auto
-from socket import AF_INET, SOCK_DGRAM, socket
-from struct import unpack
-
-import numpy as np
-from rcsss._core.common import RPY, Pose
-from rcsss.camera.interface import SimpleFrameRate
-from rcsss.camera.realsense import RealSenseCameraSet
-from rcsss.control.fr3_desk import FCI, Desk, DummyResourceManager
-from rcsss.control.utils import load_creds_fr3_desk
-from rcsss.envs.base import (
-    ControlMode,
-    GripperDictType,
-    LimitedTQuartRelDictType,
-    RelativeActionSpace,
-    RelativeTo,
-    RobotInstance,
-)
-from rcsss.envs.factories import fr3_hw_env, fr3_sim_env
-from rcsss.envs.utils import (
-    default_fr3_hw_gripper_cfg,
-    default_fr3_hw_robot_cfg,
-    default_fr3_sim_gripper_cfg,
-    default_fr3_sim_robot_cfg,
-    default_realsense,
-)
-from rcsss.envs.wrappers import StorageWrapperHDF5
-
-logger = logging.getLogger(__name__)
-
-EGO_LOCK = False
-VIVE_HOST = "192.168.99.1"
-VIVE_PORT = 54321
-
-INCLUDE_ROTATION = True
-ROBOT_IP = "192.168.101.1"
-ROBOT_INSTANCE = RobotInstance.HARDWARE
-RECORD_FPS = 15
-# set camera dict to none disable cameras
-CAMERA_DICT = {
-    # "wrist": "244222071045",
-    # "wrist": "218622278131", # new realsense
-    "bird_eye": "243522070364",
-    "side": "243522070385",
-}
-# CAMERA_DICT = None
-DEBUG = True
-STORAGE_PATH = "data"
-
-
-
-class Button(IntFlag):
-    L_TRIGGER = auto()
-    L_SQUEEZE = auto()
-    LT_CLICK = auto()
-    R_TRIGGER = auto()
-    R_SQUEEZE = auto()
-    RT_CLICK = auto()
-
-
-class UDPViveActionServer(threading.Thread):
-    # seven doubles and one integer in network byte order
-    FMT = "!" + 7 * "d" + "i" + 6 * "d"
-
-    # base transform from OpenXR coordinate system
-    # transform_from_openxr = Pose(RPY(roll=0.5 * np.pi, yaw=0)) # for second robot
-    transform_from_openxr = Pose(RPY(roll=0.5 * np.pi, yaw=np.pi))
-
-    def __init__(
-        self, host: str, port: int, env: RelativeActionSpace, trg_btn=Button.R_TRIGGER, grp_btn=Button.R_SQUEEZE
-    ):
-        super().__init__()
-        self._host: str = host
-        self._port: int = port
-
-        self._resource_lock = threading.Lock()
-        self._env_lock = threading.Lock()
-        self._env = env
-        self._trg_btn = trg_btn
-        self._grp_btn = grp_btn
-        self._grp_pos = 1
-        self._buttons = 0
-        self._exit_requested = False
-        self._last_controller_pose = Pose()
-        self._offset_pose = Pose()
-        self._ego_lock = EGO_LOCK
-        self._ego_transform = Pose()
-        self._env.set_origin_to_current()
-        self._step_env = False
-
-    def next_action(self) -> Pose:
-        with self._resource_lock:
-            transform = Pose(
-                translation=self._last_controller_pose.translation() - self._offset_pose.translation(),
-                quaternion=(self._last_controller_pose * self._offset_pose.inverse()).rotation_q(),
-            )
-            return (
-                self._ego_transform
-                * UDPViveActionServer.transform_from_openxr
-                * transform
-                * UDPViveActionServer.transform_from_openxr.inverse()
-                * self._ego_transform.inverse()
-            )
-
-    def get_last_controller_pose(self) -> Pose:
-        return self._last_controller_pose
-
-    def run(self):
-        warning_raised = False
-
-        with socket(AF_INET, SOCK_DGRAM) as sock:
-            sock.settimeout(2)
-            sock.bind((self._host, self._port))
-            while not self._exit_requested:
-                try:
-                    unpacked = unpack(UDPViveActionServer.FMT, sock.recv(13 * 8 + 4))
-                    if warning_raised:
-                        logger.info("[UDP Server] connection reestablished")
-                        warning_raised = False
-                except TimeoutError:
-                    if not warning_raised:
-                        logger.warning("[UDP server] socket timeout (0.1s), waiting for packets")
-                        warning_raised = True
-                    continue
-                last_controller_pose_raw = np.ctypeslib.as_array(unpacked[:7])
-                last_controller_pose = Pose(
-                    translation=last_controller_pose_raw[4:],
-                    quaternion=last_controller_pose_raw[:4] if INCLUDE_ROTATION else np.array([0, 0, 0, 1]),
-                )
-                if Button(int(unpacked[7])) & self._trg_btn and not Button(int(self._buttons)) & self._trg_btn:
-                    # trigger just pressed (first data sample with button pressed
-
-                    # set forward direction based on current controller pose
-                    if self._ego_lock:
-                        x_axis = Pose(translation=np.array([1, 0, 0]))
-                        x_axis_rot = (
-                            UDPViveActionServer.transform_from_openxr
-                            * Pose(quaternion=last_controller_pose.rotation_q())
-                            * UDPViveActionServer.transform_from_openxr.inverse()
-                            * x_axis
-                        )
-
-                        # Compute angle around z axis: https://stackoverflow.com/questions/21483999/using-atan2-to-find-angle-between-two-vectors
-                        rot_z = np.atan2(x_axis_rot.translation()[1], x_axis_rot.translation()[0]) - np.atan2(
-                            x_axis.translation()[1], x_axis.translation()[0]
-                        )
-                        rot_z -= np.pi / 2
-
-                        print(f"Angle: {rot_z*180/np.pi}")
-                        with self._resource_lock:
-                            self._ego_transform = Pose(RPY(yaw=-rot_z))
-                    else:
-                        with self._resource_lock:
-                            self._ego_transform = Pose()
-
-                    with self._resource_lock:
-                        self._offset_pose = last_controller_pose
-                        self._last_controller_pose = last_controller_pose
-
-                elif not Button(int(unpacked[7])) & self._trg_btn and Button(int(self._buttons)) & self._trg_btn:
-                    # released
-                    with self._resource_lock:
-                        self._last_controller_pose = Pose()
-                        self._offset_pose = Pose()
-                        self._ego_transform = Pose()
-                    with self._env_lock:
-                        self._env.set_origin_to_current()
-
-                elif Button(int(unpacked[7])) & self._trg_btn:
-                    # button is pressed
-                    with self._resource_lock:
-                        self._last_controller_pose = last_controller_pose
-
-                if Button(int(unpacked[7])) & self._grp_btn and not Button(int(self._buttons)) & self._grp_btn:
-                    # just pressed
-                    self._grp_pos = 0
-                elif not Button(int(unpacked[7])) & self._grp_btn and Button(int(self._buttons)) & self._grp_btn:
-                    # just released
-                    self._grp_pos = 1
-
-                self._buttons = unpacked[7]
-
-    def stop(self):
-        self._exit_requested = True
-        self.join()
-
-    def __enter__(self):
-        self.start()
-        return self
-
-    def __exit__(self, *_):
-        self.stop()
-
-    def stop_env_loop(self):
-        self._step_env = False
-
-    def environment_step_loop(self):
-        rate_limiter = SimpleFrameRate()
-        self._step_env = True
-        while self._step_env:
-            if self._exit_requested:
-                self._step_env = False
-                break
-            displacement = self.next_action()
-            action = dict(
-                LimitedTQuartRelDictType(tquart=np.concatenate([displacement.translation(), displacement.rotation_q()]))
-            )
-
-            action.update(GripperDictType(gripper=self._grp_pos))
-
-            with self._env_lock:
-                self._env.step(action)
-            rate_limiter(RECORD_FPS)
-
-
-def input_loop(env_rel, action_server: UDPViveActionServer, camera_set: RealSenseCameraSet | None):
-    while True:
-        i = input("press enter to start recording, 'q' to stop, 'h' to move to home: ")
-        match i:
-            case "q":
-                sys.exit(0)
-            case "h":
-                env_rel.unwrapped.robot.move_home()
-            case _:
-                # record videos
-                if camera_set is not None:
-                    video_path = env_rel.path / "videos"
-                    video_path.mkdir(parents=True, exist_ok=True)
-                    camera_set.record_video(video_path, f"{env_rel.key}_{env_rel.timestamp}_{env_rel.episode_count}")
-
-                thread = threading.Thread(target=action_server.environment_step_loop)
-                thread.start()
-                input("Robot is being stepped, press enter to finish and save episode.")
-                print("stopping")
-                action_server.stop_env_loop()
-                thread.join()
-                env_rel.reset()
-                print("videos saved!")
-
-
-def main():
-    if ROBOT_INSTANCE == RobotInstance.HARDWARE:
-        user, pw = load_creds_fr3_desk()
-        resource_manger = FCI(Desk(ROBOT_IP, user, pw), unlock=False, lock_when_done=False, guiding_mode_when_done=True)
-    else:
-        resource_manger = DummyResourceManager()
-
-    with resource_manger:
-
-        if ROBOT_INSTANCE == RobotInstance.HARDWARE:
-            camera_set = default_realsense(CAMERA_DICT)
-            env_rel = fr3_hw_env(
-                ip=ROBOT_IP,
-                camera_set=camera_set,
-                # collision_guard="lab",
-                robot_cfg=default_fr3_hw_robot_cfg(async_control=True),
-                control_mode=ControlMode.CARTESIAN_TQuart,
-                # control_mode=ControlMode.JOINTS,
-                gripper_cfg=default_fr3_hw_gripper_cfg(async_control=True),
-                max_relative_movement=(0.5, np.deg2rad(90)),
-                # max_relative_movement=np.deg2rad(20),
-                relative_to=RelativeTo.CONFIGURED_ORIGIN,
-            )
-        else:
-            env_rel = fr3_sim_env(
-                control_mode=ControlMode.CARTESIAN_TQuart,
-                # control_mode=ControlMode.JOINTS,
-                robot_cfg=default_fr3_sim_robot_cfg(),
-                collision_guard=False,
-                mjcf="lab",
-                gripper_cfg=default_fr3_sim_gripper_cfg(),
-                # camera_set_cfg=default_mujoco_cameraset_cfg(),
-                max_relative_movement=0.5,
-                relative_to=RelativeTo.CONFIGURED_ORIGIN,
-            )
-            env_rel.get_wrapper_attr("sim").open_gui()
-
-        if not DEBUG:
-            env_rel = StorageWrapperHDF5(env_rel, path=STORAGE_PATH, camera_set=camera_set)
-            # ip_secondary = "192.168.102.1"
-            # with Desk.fci(ip_secondary, user, pw):
-            #     f = rcsss.hw.FR3(ip_secondary)
-            #     config = rcsss.hw.FR3Config()
-            #     f.set_parameters(config)
-            #     env_rel.get_wrapper_attr("log_files")({
-            #         "camrobot_cart.txt": str(f.get_cartesian_position()),
-            #         "camrobot_joints.txt": str(f.get_joint_position()),
-            #     })
-
-        env_rel.reset()
-
-        with env_rel, UDPViveActionServer(VIVE_HOST, VIVE_PORT, env_rel) as action_server:
-            if not DEBUG:
-                input_loop(env_rel, action_server, camera_set)
-            else:
-                action_server.environment_step_loop()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/python/rcsss/envs/wrappers.py b/python/rcsss/envs/wrappers.py
deleted file mode 100644
index 4824419c..00000000
--- a/python/rcsss/envs/wrappers.py
+++ /dev/null
@@ -1,395 +0,0 @@
-import os
-import subprocess
-from datetime import datetime
-from pathlib import Path
-from typing import Any, SupportsFloat
-
-import gymnasium as gym
-import h5py
-import numpy as np
-from PIL import Image
-from rcsss.camera.hw import BaseHardwareCameraSet
-
-
-class StorageWrapperNumpy(gym.Wrapper):
-    # TODO: this should also record the instruction
-    FILE = "episode_{}.npz"
-    FOLDER = "experiment_{}"
-
-    def __init__(
-        self,
-        env: gym.Env,
-        path: str,
-        instruction: str | None = None,
-        description: str | None = None,
-        camera_set: BaseHardwareCameraSet | None = None,
-    ):
-        super().__init__(env)
-        self.episode_count = 0
-        self.step_count = 0
-        self.data = {}
-        self.timestamp = str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
-        self.camera_set = camera_set
-        self.prev_obs: dict | None = None
-
-        # make folders
-        self.path = Path(path) / self.FOLDER.format(self.timestamp)
-        Path(self.path).mkdir(parents=True, exist_ok=False)
-        if description is None:
-            # write a small description from input into file
-            description = input("Please enter a description for this experiment: ")
-        with open(self.path / "description.txt", "w") as f:
-            f.write(description)
-        if instruction is None:
-            # write instruction from input into file
-            instruction = input("Instruction: ")
-        self.language_instruction = str(instruction)
-        self.data["language_instruction"] = self.language_instruction
-        # get git commit id
-        os.system(f'git log --format="%H" -n 1 > {os.path.join(str(self.path), "git_id.txt")}')
-        # submodule git ids
-        os.system(f'git submodule status > {os.path.join(str(self.path), "git_id_submodules.txt")}')
-        # get git diff
-        os.system(f'git diff --submodule=diff > {os.path.join(str(self.path), "git_diff.txt")}')
-
-    def flush(self):
-        """writes data to disk"""
-        if len(self.data) == 0 or self.step_count == 0:
-            return
-        print(self.data.keys())
-        np.savez(self.path / self.FILE.format(self.episode_count), **self.data)
-        if self.camera_set is not None and self.camera_set.recording_ongoing():
-            self.camera_set.stop_video()
-
-        self.episode_count += 1
-        self.data = {}
-
-    @staticmethod
-    def create_gif(path: str, episode: int, key: str, frame_interval_s: float = 0.5):
-        data = np.load(path)
-
-        if (
-            "observation" in data
-            and "frames" in data["observation"]
-            and key in data["observation"]["frames"]
-            and "rgb" in data["observation"]["frames"][key]
-        ):
-            gif_path = Path(path) / "gifs"
-            gif_path.mkdir(parents=True, exist_ok=True)
-            imgs = []
-            previous_timestamp = 0
-            d = data["observation"]["frames"][key]["rgb"]
-            for idx in range(min(len(d), len(data["timestamp"]))):
-                # skip images that have timestamps closer together than 0.5s
-                img = d[idx]
-                if data["timestamp"][idx] - previous_timestamp < frame_interval_s:
-                    continue
-                previous_timestamp = data["timestamp"][idx]
-                imgs.append(Image.fromarray(img))
-            imgs[0].save(
-                gif_path / "episode_{}_{}.gif".format(episode, key),
-                save_all=True,
-                append_images=imgs[1:],
-                duration=frame_interval_s * 1000,
-                loop=0,
-            )
-        else:
-            if "observation" in data and "frames" in data["observation"] and key in data["observation"]["frames"]:
-                msg = f"Key {key} not found in {data['observation']['frames'].keys()}"
-            else:
-                msg = f"Key {key} not found in data"
-            raise ValueError(msg)
-
-    def step(self, action: dict) -> tuple[Any, SupportsFloat, bool, bool, dict[str, Any]]:
-        obs, reward, terminated, truncated, info = super().step(action)
-        # write obs and action into data
-        act_obs = {"action": action, "observation": self.prev_obs, "timestamp": datetime.now().timestamp()}
-        # delay observation by one time step to ensure that the observation leads to the action (and not like in gym env)
-        self.prev_obs = obs
-        self.data["language_instruction"] = self.language_instruction
-        for key, value in act_obs.items():
-            if key not in self.data:
-                self.data[key] = np.expand_dims(value, axis=0)
-            else:
-                self.data[key] = np.concatenate([self.data[key], np.expand_dims(value, axis=0)], axis=0)
-        self.step_count += 1
-        return obs, reward, terminated, truncated, info
-
-    def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[Any, dict[str, Any]]:
-        self.flush()
-        self.step_count = 0
-        self.prev_obs = None
-        re = super().reset(seed=seed, options=options)
-        self.prev_obs = re[0]
-        return re
-
-    def close(self):
-        self.flush()
-        return super().close()
-
-    @property
-    def logger_dir(self):
-        return self.path
-
-    def log_files(self, file2content: dict[str, str]):
-        for fn, content in file2content.items():
-            with open(self.path / fn, "w") as f:
-                f.write(content)
-
-
-# TODO: gifs should not be created after each episode, but there should rather be tool
-# to create them from a dataset, how about video?
-class StorageWrapperHDF5(gym.Wrapper):
-    FILE = "data.h5"
-
-    def __init__(
-        self,
-        env: gym.Env,
-        path: str,
-        instruction: str | None = None,
-        description: str | None = None,
-        key: str | None = None,
-        camera_set: BaseHardwareCameraSet | None = None,
-    ):
-        super().__init__(env)
-        self.episode_count = 0
-        self.step_count = 0
-        self.timestamp = str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
-        self.prev_obs: dict | None = None
-        self.datasets = {}
-        self.camera_set = camera_set
-
-        # Make folders
-        self.path = Path(path)
-        Path(self.path).mkdir(parents=True, exist_ok=True)
-        if description is None:
-            # Write a small description from input into file
-            description = input("Please enter a description for this experiment: ")
-        self.description = description
-
-        # with open(self.path / "description.txt", "w") as f:
-        #     f.write(self.description)
-
-        if instruction is None:
-            # Write instruction from input into file
-            instruction = input("Instruction: ")
-        self.language_instruction = str(instruction)
-
-        if key is None:
-            # Write instruction from input into file
-            key = input("key for hdf5: ")
-        self.key = key
-        # Open HDF5 file in append mode
-        self.h5file = h5py.File(self.path / self.FILE, "a")
-        # Check if instruction group exists
-        if self.key in self.h5file:
-            self.instruction_group = self.h5file[self.key]
-        else:
-            self.instruction_group = self.h5file.create_group(self.key)
-
-    def append_to_hdf5(self, group, data_dict, index):
-        for key, value in data_dict.items():
-            if isinstance(value, dict):
-                # Handle subgroup
-                subgroup = group.create_group(key) if key not in group else group[key]
-                self.append_to_hdf5(subgroup, value, index)
-            else:
-                # Handle dataset
-                dataset_name = key
-                full_dataset_path = group.name + "/" + dataset_name
-                if full_dataset_path not in self.datasets:
-                    # First time seeing this dataset
-                    # Determine dtype
-                    if isinstance(value, str):
-                        # Variable-length string
-                        dtype = h5py.string_dtype(encoding="utf-8")
-                        shape = ()
-                    elif np.isscalar(value):
-                        # Numeric scalar
-                        dtype = type(value)
-                        shape = ()
-                    elif isinstance(value, np.ndarray):
-                        # Numpy array
-                        v = value
-                        dtype = value.dtype
-                        shape = value.shape
-                    else:
-                        # Other types, try to convert to numpy array
-                        try:
-                            v = np.array(value)
-                            dtype = v.dtype
-                            shape = v.shape
-                        except Exception as e:
-                            msg = f"Unsupported data type for key '{key}': {type(value)}"
-                            raise ValueError(msg) from e
-                    # Create dataset
-                    initial_shape = (index + 1, *shape)
-                    maxshape = (None, *shape)
-                    dataset = group.create_dataset(
-                        dataset_name,
-                        shape=initial_shape,
-                        maxshape=maxshape,
-                        chunks=True,
-                        dtype=dtype,
-                        compression="gzip",
-                    )
-                    self.datasets[full_dataset_path] = dataset
-                else:
-                    dataset = self.datasets[full_dataset_path]
-                    if dataset.shape[0] <= index:
-                        new_size = index + 1
-                        dataset.resize(new_size, axis=0)
-                # Store value
-                if isinstance(value, str) or np.isscalar(value):
-                    dataset[index] = value
-                else:
-                    dataset[index, ...] = v
-
-    def flush(self):
-        """Writes data to disk."""
-        if self.step_count == 0:
-            return
-        # Flush HDF5 file
-        self.h5file.flush()
-        # Stop camera recording if applicable
-        if self.camera_set is not None and self.camera_set.recording_ongoing():
-            self.camera_set.stop_video()
-        # Reset datasets for the next episode
-        self.datasets = {}
-        self.episode_count += 1
-
-    @staticmethod
-    def create_gif(
-        h5file_path: str,
-        key: str,
-        episode_name: str,
-        camera_id: str,
-        frame_interval_s: float = 0.5,
-        output_folder: str | None = None,
-    ):
-
-        with h5py.File(h5file_path, "r") as data:
-            if (
-                key in data
-                and episode_name in data[key]
-                and "observation" in data[key][episode_name]
-                and "frames" in data[key][episode_name]["observation"]
-                and camera_id in data[key][episode_name]["observation"]["frames"]
-                and "rgb" in data[key][episode_name]["observation"]["frames"][camera_id]
-            ):
-                if output_folder is None:
-                    output_folder = os.path.join(os.path.dirname(h5file_path), "gifs")
-                os.makedirs(output_folder, exist_ok=True)
-                gif_filename = f"{key}_{episode_name}_{camera_id}.gif"
-                gif_filepath = os.path.join(output_folder, gif_filename)
-                imgs = []
-                previous_timestamp = 0
-                d = data["observation"]["frames"][camera_id]["rgb"]
-                for idx in range(min(len(d), len(data["timestamp"]))):
-                    # skip images that have timestamps closer together than 0.5s
-                    img = d[idx]
-                    if data["timestamp"][idx] - previous_timestamp < frame_interval_s:
-                        continue
-                    previous_timestamp = data["timestamp"][idx]
-                    imgs.append(Image.fromarray(img))
-                imgs[0].save(
-                    gif_filepath,
-                    save_all=True,
-                    append_images=imgs[1:],
-                    duration=frame_interval_s * 1000,
-                    loop=0,
-                )
-            else:
-                msg = f"camera id {camera_id} not found in data"
-                raise ValueError(msg)
-
-    def step(self, action: dict) -> tuple[Any, SupportsFloat, bool, bool, dict[str, Any]]:
-        obs, reward, terminated, truncated, info = super().step(action)
-        # Delay observation by one time step
-        act_obs = {"action": action, "observation": self.prev_obs, "timestamp": datetime.now().timestamp()}
-        self.prev_obs = obs  # Update prev_obs for next step
-        # Append data to HDF5
-        self.append_to_hdf5(self.episode_group, act_obs, self.step_count)
-        self.step_count += 1
-        return obs, reward, terminated, truncated, info
-
-    def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[Any, dict[str, Any]]:
-        self.flush()
-        self.step_count = 0
-        self.prev_obs = None
-        # Create a new episode group
-        episode_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-        self.episode_group = self.instruction_group.create_group(episode_name)
-        self.datasets = {}
-        # Get git metadata
-        try:
-            git_diff = subprocess.check_output(["git", "diff", "--submodule=diff"]).decode("utf-8")
-            git_commit_id = subprocess.check_output(["git", "log", "--format=%H", "-n", "1"]).decode("utf-8").strip()
-            git_submodule_status = subprocess.check_output(["git", "submodule", "status"]).decode("utf-8")
-        except subprocess.CalledProcessError:
-            git_diff = ""
-            git_commit_id = ""
-            git_submodule_status = ""
-        # Store git info as attributes
-        self.episode_group.attrs["git_diff"] = git_diff
-        self.episode_group.attrs["git_commit_id"] = git_commit_id
-        self.episode_group.attrs["git_submodule_status"] = git_submodule_status
-        # Also store description and language instruction
-        self.episode_group.attrs["description"] = self.description
-        self.episode_group.attrs["language_instruction"] = self.language_instruction
-        result = super().reset(seed=seed, options=options)
-        self.prev_obs = result[0]  # Initialize prev_obs
-        return result
-
-    def close(self):
-        self.flush()
-        self.h5file.close()
-        return super().close()
-
-    @property
-    def logger_dir(self):
-        return self.path
-
-    def log_files(self, file2content: dict[str, str]):
-        for fn, content in file2content.items():
-            with open(self.path / fn, "w") as f:
-                f.write(content)
-
-
-def listdict2dictlist(LD):
-    return {k: [dic[k] for dic in LD] for k in LD[0]}
-
-
-class RHCWrapper(gym.Wrapper):
-    """
-    Performs receding horizon control. The policy returns `pred_horizon` actions and
-    we execute `exec_horizon` of them.
-    """
-
-    def __init__(self, env: gym.Env, exec_horizon: int):
-        super().__init__(env)
-        self.exec_horizon = exec_horizon
-
-    def step(self, actions):
-        if self.exec_horizon == 1 and len(actions.shape) == 1:
-            actions = actions[None]
-        assert len(actions) >= self.exec_horizon
-        rewards = []
-        observations = []
-        infos = []
-
-        for i in range(self.exec_horizon):
-            # obs, reward, done, trunc, info = self.env.step(actions[i])
-            obs, reward, done, trunc, info = self.env.step({"xyzrpy": actions[i, :6], "gripper": actions[i, 6]})
-            observations.append(obs)
-            rewards.append(reward)
-            infos.append(info)
-
-            if done or trunc:
-                break
-
-        infos = listdict2dictlist(infos)
-        infos["rewards"] = rewards
-        infos["observations"] = observations
-
-        return obs, np.sum(rewards), done, trunc, infos

From 036a9eeb342cec9a5008df77ada7d5513f3dc211 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Fri, 9 May 2025 12:36:21 +0200
Subject: [PATCH 50/56] fix: type linting issues

---
 python/rcsss/control/fr3_desk.py | 4 ++--
 python/rcsss/envs/base.py        | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/rcsss/control/fr3_desk.py b/python/rcsss/control/fr3_desk.py
index 16fc2265..52790174 100644
--- a/python/rcsss/control/fr3_desk.py
+++ b/python/rcsss/control/fr3_desk.py
@@ -303,8 +303,8 @@ def take_control(self, force: bool = False) -> bool:
         if active.id != "" and self._token.id == "" and os.path.exists(token_path):
             with open(token_path, "r") as f:
                 content = f.read()
-            content = content.split("/n")
-            self._token = Token(content[0], content[1], content[2])
+            content_splitted = content.split("/n")
+            self._token = Token(*content_splitted)
 
         # we already have control
         if active.id != "" and self._token.id == active.id:
diff --git a/python/rcsss/envs/base.py b/python/rcsss/envs/base.py
index 7ad8da9c..92159f46 100644
--- a/python/rcsss/envs/base.py
+++ b/python/rcsss/envs/base.py
@@ -386,6 +386,7 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
         elif self.unwrapped.get_control_mode() == ControlMode.CARTESIAN_TRPY and self.trpy_key in action:
             assert isinstance(self._origin, common.Pose), "Invalid origin type given the control mode."
             assert isinstance(self.max_mov, tuple)
+            assert isinstance(self._last_action, common.Pose)
             pose_space = cast(gym.spaces.Box, get_space(TRPYDictType).spaces[self.trpy_key])
 
             if self.relative_to == RelativeTo.LAST_STEP or self._last_action is None:
@@ -429,6 +430,7 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
         elif self.unwrapped.get_control_mode() == ControlMode.CARTESIAN_TQuart and self.tquart_key in action:
             assert isinstance(self._origin, common.Pose), "Invalid origin type given the control mode."
             assert isinstance(self.max_mov, tuple)
+            assert isinstance(self._last_action, common.Pose)
             pose_space = cast(gym.spaces.Box, get_space(TQuartDictType).spaces[self.tquart_key])
 
             if self.relative_to == RelativeTo.LAST_STEP or self._last_action is None:

From 2529355b0371ed2a90d7405b97b175f26058c730 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Fri, 9 May 2025 12:38:14 +0200
Subject: [PATCH 51/56] style: fixed python format

---
 python/rcsss/envs/base.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/rcsss/envs/base.py b/python/rcsss/envs/base.py
index 92159f46..30caf73d 100644
--- a/python/rcsss/envs/base.py
+++ b/python/rcsss/envs/base.py
@@ -7,7 +7,6 @@
 
 import gymnasium as gym
 import numpy as np
-from python.rcsss._core.common import Robot
 from rcsss import common, sim
 from rcsss.camera.interface import BaseCameraSet
 from rcsss.envs.space_utils import (
@@ -19,6 +18,8 @@
     get_space_keys,
 )
 
+from python.rcsss._core.common import Robot
+
 _logger = logging.getLogger()
 
 

From 5fc6195033a861e6e3e698f9fc0a4d371340022a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Fri, 9 May 2025 13:07:48 +0200
Subject: [PATCH 52/56] fix: wrong automatic import and logger

---
 python/rcsss/envs/base.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/rcsss/envs/base.py b/python/rcsss/envs/base.py
index 30caf73d..dfc28a0e 100644
--- a/python/rcsss/envs/base.py
+++ b/python/rcsss/envs/base.py
@@ -8,6 +8,7 @@
 import gymnasium as gym
 import numpy as np
 from rcsss import common, sim
+from rcsss._core.common import Robot
 from rcsss.camera.interface import BaseCameraSet
 from rcsss.envs.space_utils import (
     ActObsInfoWrapper,
@@ -18,9 +19,7 @@
     get_space_keys,
 )
 
-from python.rcsss._core.common import Robot
-
-_logger = logging.getLogger()
+_logger = logging.getLogger(__name__)
 
 
 class TRPYDictType(RCSpaceType):

From c1aa70dbd85a72034227fad7448006b2eea38498 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Fri, 9 May 2025 13:58:51 +0200
Subject: [PATCH 53/56] fix(env): type assertion and improved tests

- fixed type assertion location (noticed by tests)
- optimized tests by removing camera in most cases
---
 python/rcsss/envs/base.py     |  4 ++--
 python/tests/test_sim_envs.py | 40 +++++++++++++++++------------------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/python/rcsss/envs/base.py b/python/rcsss/envs/base.py
index dfc28a0e..c38f7f6b 100644
--- a/python/rcsss/envs/base.py
+++ b/python/rcsss/envs/base.py
@@ -386,7 +386,6 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
         elif self.unwrapped.get_control_mode() == ControlMode.CARTESIAN_TRPY and self.trpy_key in action:
             assert isinstance(self._origin, common.Pose), "Invalid origin type given the control mode."
             assert isinstance(self.max_mov, tuple)
-            assert isinstance(self._last_action, common.Pose)
             pose_space = cast(gym.spaces.Box, get_space(TRPYDictType).spaces[self.trpy_key])
 
             if self.relative_to == RelativeTo.LAST_STEP or self._last_action is None:
@@ -400,6 +399,7 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
                 )
                 self._last_action = clipped_pose_offset
             else:
+                assert isinstance(self._last_action, common.Pose)
                 pose_diff = (
                     common.Pose(
                         translation=action[self.trpy_key][:3],
@@ -430,7 +430,6 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
         elif self.unwrapped.get_control_mode() == ControlMode.CARTESIAN_TQuart and self.tquart_key in action:
             assert isinstance(self._origin, common.Pose), "Invalid origin type given the control mode."
             assert isinstance(self.max_mov, tuple)
-            assert isinstance(self._last_action, common.Pose)
             pose_space = cast(gym.spaces.Box, get_space(TQuartDictType).spaces[self.tquart_key])
 
             if self.relative_to == RelativeTo.LAST_STEP or self._last_action is None:
@@ -444,6 +443,7 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
                 )
                 self._last_action = clipped_pose_offset
             else:
+                assert isinstance(self._last_action, common.Pose)
                 pose_diff = (
                     common.Pose(
                         translation=action[self.tquart_key][:3],
diff --git a/python/tests/test_sim_envs.py b/python/tests/test_sim_envs.py
index 83baa811..9806d9ec 100644
--- a/python/tests/test_sim_envs.py
+++ b/python/tests/test_sim_envs.py
@@ -108,10 +108,10 @@ def test_non_zero_action_trpy(self, cfg):
         obs, _, _, _, info = env.step(non_zero_action)
         self.assert_no_pose_change(info, expected_obs, obs)
 
-    def test_relative_zero_action_trpy(self, cfg, gripper_cfg, cam_cfg):
+    def test_relative_zero_action_trpy(self, cfg, gripper_cfg):
         # env creation
         env = fr3_sim_env(
-            ControlMode.CARTESIAN_TRPY, cfg, gripper_cfg=gripper_cfg, camera_set_cfg=cam_cfg, max_relative_movement=0.5
+            ControlMode.CARTESIAN_TRPY, cfg, gripper_cfg=gripper_cfg, camera_set_cfg=None, max_relative_movement=0.5
         )
         obs_initial, _ = env.reset()
         # action to be performed
@@ -120,10 +120,10 @@ def test_relative_zero_action_trpy(self, cfg, gripper_cfg, cam_cfg):
         obs, _, _, _, info = env.step(zero_action)
         self.assert_no_pose_change(info, obs_initial, obs)
 
-    def test_relative_non_zero_action(self, cfg, gripper_cfg, cam_cfg):
+    def test_relative_non_zero_action(self, cfg, gripper_cfg):
         # env creation
         env = fr3_sim_env(
-            ControlMode.CARTESIAN_TRPY, cfg, gripper_cfg=gripper_cfg, camera_set_cfg=cam_cfg, max_relative_movement=0.5
+            ControlMode.CARTESIAN_TRPY, cfg, gripper_cfg=gripper_cfg, camera_set_cfg=None, max_relative_movement=0.5
         )
         obs_initial, _ = env.reset()
         # action to be performed
@@ -135,13 +135,13 @@ def test_relative_non_zero_action(self, cfg, gripper_cfg, cam_cfg):
         obs, _, _, _, info = env.step(non_zero_action)
         self.assert_no_pose_change(info, obs_initial, expected_obs)
 
-    def test_collision_trpy(self, cfg, gripper_cfg, cam_cfg):
+    def test_collision_trpy(self, cfg, gripper_cfg):
         """
         Check that an obvious collision is detected by sim
         """
         # env creation
         env = fr3_sim_env(
-            ControlMode.CARTESIAN_TRPY, cfg, gripper_cfg=gripper_cfg, camera_set_cfg=cam_cfg, max_relative_movement=None
+            ControlMode.CARTESIAN_TRPY, cfg, gripper_cfg=gripper_cfg, camera_set_cfg=None, max_relative_movement=None
         )
         obs, _ = env.reset()
         # an obvious below ground collision action
@@ -152,7 +152,7 @@ def test_collision_trpy(self, cfg, gripper_cfg, cam_cfg):
         obs, _, _, _, info = env.step(collision_action)
         self.assert_collision(info)
 
-    def test_collision_guard_trpy(self, cfg, gripper_cfg, cam_cfg):
+    def test_collision_guard_trpy(self, cfg, gripper_cfg):
         """
         Check that an obvious collision is detected by the CollisionGuard
         """
@@ -162,7 +162,7 @@ def test_collision_guard_trpy(self, cfg, gripper_cfg, cam_cfg):
             cfg,
             gripper_cfg=gripper_cfg,
             collision_guard=True,
-            camera_set_cfg=cam_cfg,
+            camera_set_cfg=None,
             max_relative_movement=None,
         )
         obs, _ = env.reset()
@@ -236,13 +236,13 @@ def test_zero_action_tquart(self, cfg):
         obs, _, _, _, info = env.step(zero_action)
         self.assert_no_pose_change(info, obs_initial, obs)
 
-    def test_relative_zero_action_tquart(self, cfg, gripper_cfg, cam_cfg):
+    def test_relative_zero_action_tquart(self, cfg, gripper_cfg):
         # env creation
         env_rel = fr3_sim_env(
             ControlMode.CARTESIAN_TQuart,
             cfg,
             gripper_cfg=gripper_cfg,
-            camera_set_cfg=cam_cfg,
+            camera_set_cfg=None,
             max_relative_movement=0.5,
         )
         obs_initial, _ = env_rel.reset()
@@ -251,7 +251,7 @@ def test_relative_zero_action_tquart(self, cfg, gripper_cfg, cam_cfg):
         obs, _, _, _, info = env_rel.step(zero_rel_action)
         self.assert_no_pose_change(info, obs_initial, obs)
 
-    def test_collision_tquart(self, cfg, gripper_cfg, cam_cfg):
+    def test_collision_tquart(self, cfg, gripper_cfg):
         """
         Check that an obvious collision is detected by sim
         """
@@ -260,7 +260,7 @@ def test_collision_tquart(self, cfg, gripper_cfg, cam_cfg):
             ControlMode.CARTESIAN_TQuart,
             cfg,
             gripper_cfg=gripper_cfg,
-            camera_set_cfg=cam_cfg,
+            camera_set_cfg=None,
             max_relative_movement=None,
         )
         obs, _ = env.reset()
@@ -272,7 +272,7 @@ def test_collision_tquart(self, cfg, gripper_cfg, cam_cfg):
         _, _, _, _, info = env.step(collision_action)
         self.assert_collision(info)
 
-    def test_collision_guard_tquart(self, cfg, gripper_cfg, cam_cfg):
+    def test_collision_guard_tquart(self, cfg, gripper_cfg):
         """
         Check that an obvious collision is detected by the CollisionGuard
         """
@@ -282,7 +282,7 @@ def test_collision_guard_tquart(self, cfg, gripper_cfg, cam_cfg):
             cfg,
             gripper_cfg=gripper_cfg,
             collision_guard=True,
-            camera_set_cfg=cam_cfg,
+            camera_set_cfg=None,
             max_relative_movement=None,
         )
         obs, _ = env.reset()
@@ -346,13 +346,13 @@ def test_non_zero_action_joints(self, cfg):
         assert info["is_sim_converged"]
         assert np.allclose(obs["joints"], non_zero_action["joints"], atol=0.01, rtol=0)
 
-    def test_collision_joints(self, cfg, gripper_cfg, cam_cfg):
+    def test_collision_joints(self, cfg, gripper_cfg):
         """
         Check that an obvious collision is detected by the CollisionGuard
         """
         # env creation
         env = fr3_sim_env(
-            ControlMode.JOINTS, cfg, gripper_cfg=gripper_cfg, camera_set_cfg=cam_cfg, max_relative_movement=None
+            ControlMode.JOINTS, cfg, gripper_cfg=gripper_cfg, camera_set_cfg=None, max_relative_movement=None
         )
         env.reset()
         # the below action is a test_case where there is an obvious collision regardless of the gripper action
@@ -361,7 +361,7 @@ def test_collision_joints(self, cfg, gripper_cfg, cam_cfg):
         _, _, _, _, info = env.step(collision_act)
         self.assert_collision(info)
 
-    def test_collision_guard_joints(self, cfg, gripper_cfg, cam_cfg):
+    def test_collision_guard_joints(self, cfg, gripper_cfg):
         """
         Check that an obvious collision is detected by sim
         """
@@ -371,7 +371,7 @@ def test_collision_guard_joints(self, cfg, gripper_cfg, cam_cfg):
             cfg,
             gripper_cfg=gripper_cfg,
             collision_guard=True,
-            camera_set_cfg=cam_cfg,
+            camera_set_cfg=None,
             max_relative_movement=None,
         )
         env.reset()
@@ -387,13 +387,13 @@ def test_collision_guard_joints(self, cfg, gripper_cfg, cam_cfg):
         # assure that the robot did not move
         assert np.allclose(p1, p2)
 
-    def test_relative_zero_action_joints(self, cfg, gripper_cfg, cam_cfg):
+    def test_relative_zero_action_joints(self, cfg, gripper_cfg):
         """
         Check that an obvious collision is detected by the CollisionGuard
         """
         # env creation
         env = fr3_sim_env(
-            ControlMode.JOINTS, cfg, gripper_cfg=gripper_cfg, camera_set_cfg=cam_cfg, max_relative_movement=0.5
+            ControlMode.JOINTS, cfg, gripper_cfg=gripper_cfg, camera_set_cfg=None, max_relative_movement=0.5
         )
         obs_initial, _ = env.reset()
         act = JointsDictType(joints=np.array([0, 0, 0, 0, 0, 0, 0], dtype=np.float32))

From 9ce0c9f463effc9a942029a8d064678cd3cf989a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Fri, 9 May 2025 14:08:38 +0200
Subject: [PATCH 54/56] refactor(cli): remove remaing config functions

---
 python/rcsss/cli/main.py | 59 ----------------------------------------
 1 file changed, 59 deletions(-)

diff --git a/python/rcsss/cli/main.py b/python/rcsss/cli/main.py
index fa467625..edb5eb8a 100644
--- a/python/rcsss/cli/main.py
+++ b/python/rcsss/cli/main.py
@@ -1,6 +1,5 @@
 import logging
 from contextlib import ExitStack
-from pathlib import Path
 from time import sleep
 from typing import Annotated, Optional
 
@@ -8,9 +7,7 @@
 import rcsss
 import rcsss.control.fr3_desk
 import typer
-from PIL import Image
 from rcsss.camera.realsense import RealSenseCameraSet
-from rcsss.config import create_sample_config_yaml, read_config_yaml
 from rcsss.control.record import PoseList
 from rcsss.control.utils import load_creds_fr3_desk
 from rcsss.envs.factories import get_urdf_path
@@ -21,16 +18,6 @@
 main_app = typer.Typer(help="CLI tool for the Robot Control Stack (RCS).")
 
 
-@main_app.command()
-def sample_config(
-    path: Annotated[
-        str, typer.Option("-p", help="Path to where the default config file should be saved")
-    ] = "config.yaml",
-):
-    """Creates a sample yaml config file"""
-    create_sample_config_yaml(path)
-
-
 # REALSENSE CLI
 realsense_app = typer.Typer()
 main_app.add_typer(
@@ -53,52 +40,6 @@ def serials():
         logger.info("  %s: %s", device.product_line, device.serial)
 
 
-@realsense_app.command()
-def test(
-    path: Annotated[str, typer.Argument(help="Path to the config file")],
-):
-    """Tests all configured and connected realsense by saving the current picture."""
-    cfg = read_config_yaml(path)
-    assert cfg.hw.camera_type == "realsense", "Only realsense cameras are supported for this test."
-    assert cfg.hw.camera_config is not None, "Camera config is not set."
-    assert cfg.hw.camera_config.realsense_config is not None, "Realsense config is not set."
-    cs = RealSenseCameraSet(cfg.hw.camera_config.realsense_config)
-    cs.warm_up()
-    testdir = Path(cfg.hw.camera_config.realsense_config.record_path)
-    frame_set = cs.poll_frame_set()
-    for device_str in cfg.hw.camera_config.realsense_config.name_to_identifier:
-        assert device_str in frame_set.frames
-        frame = frame_set.frames[device_str]
-        if frame.camera.color is not None:
-            im = Image.fromarray(frame.camera.color.data, mode="RGB")
-            im.save(testdir / f"test_img_{device_str}.png")
-        if frame.camera.depth is not None:
-            im = Image.fromarray(frame.camera.depth.data)
-            im.save(testdir / f"test_depth_{device_str}.png")
-        if frame.camera.ir is not None:
-            im = Image.fromarray(frame.camera.ir.data)
-            im.save(testdir / f"test_ir_{device_str}.png")
-        if frame.imu is not None:
-            logger.info("IMU data: %s %s", frame.imu.accel, frame.imu.gyro)
-
-
-@realsense_app.command()
-def test_record(
-    path: Annotated[str, typer.Argument(help="Path to the config file")],
-    n_frames: Annotated[int, typer.Argument(help="Name of the camera that should be tested")] = 100,
-):
-    """Tests all configured and connected realsense by saving the current picture."""
-    cfg = read_config_yaml(path)
-    assert cfg.hw.camera_type == "realsense", "Only realsense cameras are supported for this test."
-    assert cfg.hw.camera_config is not None, "Camera config is not set."
-    assert cfg.hw.camera_config.realsense_config is not None, "Realsense config is not set."
-    cs = RealSenseCameraSet(cfg.hw.camera_config.realsense_config)
-    cs.start(warm_up=True)
-    while cs.buffer_size() < n_frames:
-        sleep(1 / cfg.hw.camera_config.realsense_config.frame_rate)
-    cs.stop()
-
-
 # FR3 CLI
 fr3_app = typer.Typer()
 main_app.add_typer(

From f7a34090855a682c9028ad92a1694435251913a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Fri, 9 May 2025 14:10:34 +0200
Subject: [PATCH 55/56] fix(fr3 desk): error when circle is not in event dict

---
 python/rcsss/control/fr3_desk.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/rcsss/control/fr3_desk.py b/python/rcsss/control/fr3_desk.py
index 52790174..f2826ddd 100644
--- a/python/rcsss/control/fr3_desk.py
+++ b/python/rcsss/control/fr3_desk.py
@@ -339,7 +339,7 @@ def take_control(self, force: bool = False) -> bool:
             ) as websocket:
                 while True:
                     event: dict = json_module.loads(websocket.recv(timeout))
-                    if event["circle"]:
+                    if event.get("circle", False):
                         break
         self._token = Token(str(response["id"]), self._username, response["token"])
         with open(token_path, "w") as f:

From 28a706a5741b30bc04ef3275490197f03f932bcb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Fri, 9 May 2025 14:30:59 +0200
Subject: [PATCH 56/56] chore: models version bump

- updated models version
- adapted naming of models repo: eye-in-hand to wrist, removed color ref in geoms, box-joint to box_joint
---
 CMakeLists.txt                    |  2 +-
 python/examples/fr3.py            |  2 +-
 python/examples/grasp_demo.py     |  2 +-
 python/examples/grasp_demo_lab.py |  2 +-
 python/rcsss/envs/factories.py    | 10 +++++-----
 python/rcsss/envs/sim.py          | 10 ++++------
 python/rcsss/envs/utils.py        |  4 ++--
 7 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ce55978e..3b98d648 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -98,7 +98,7 @@ FetchContent_MakeAvailable(libfranka rl pybind11)
 if (${INCLUDE_UTN_MODELS})
     if (DEFINED GITLAB_MODELS_TOKEN)
         include(download_scenes)
-        set(ref bfdd70ae189ed3a925bf1c16d29162e847a21d56)
+        set(ref 055a6fdab480dde8443030236012f76d9be90918)
         FetchContent_Declare(
             scenes
             URL         "https://gitos.rrze.fau.de/api/v4/projects/1100/repository/archive?path=scenes&sha=${ref}"
diff --git a/python/examples/fr3.py b/python/examples/fr3.py
index 80861f85..62d0f853 100644
--- a/python/examples/fr3.py
+++ b/python/examples/fr3.py
@@ -82,7 +82,7 @@ def main():
             # add camera to have a rendering gui
             cameras = {
                 "default_free": SimCameraConfig(identifier="", type=int(CameraType.default_free)),
-                "wrist": SimCameraConfig(identifier="eye-in-hand_0", type=int(CameraType.fixed)),
+                "wrist": SimCameraConfig(identifier="wrist_0", type=int(CameraType.fixed)),
             }
             cam_cfg = SimCameraSetConfig(cameras=cameras, resolution_width=1280, resolution_height=720, frame_rate=20)
             camera_set = SimCameraSet(simulation, cam_cfg)  # noqa: F841
diff --git a/python/examples/grasp_demo.py b/python/examples/grasp_demo.py
index 7c3ff1ee..89653a46 100644
--- a/python/examples/grasp_demo.py
+++ b/python/examples/grasp_demo.py
@@ -92,7 +92,7 @@ def main():
     print(env.unwrapped.robot.get_cartesian_position().translation())  # type: ignore
     # assert False
     controller = PickUpDemo(env)
-    controller.pickup("yellow_box_geom")
+    controller.pickup("box_geom")
 
 
 if __name__ == "__main__":
diff --git a/python/examples/grasp_demo_lab.py b/python/examples/grasp_demo_lab.py
index d3bb6c62..d4817132 100644
--- a/python/examples/grasp_demo_lab.py
+++ b/python/examples/grasp_demo_lab.py
@@ -89,7 +89,7 @@ def main():
     )
     env.reset()
     controller = PickUpDemo(env)
-    controller.pickup("yellow_box_geom")
+    controller.pickup("box_geom")
 
 
 if __name__ == "__main__":
diff --git a/python/rcsss/envs/factories.py b/python/rcsss/envs/factories.py
index 95c413fa..90a63b41 100644
--- a/python/rcsss/envs/factories.py
+++ b/python/rcsss/envs/factories.py
@@ -248,8 +248,8 @@ def __call__(  # type: ignore
             resolution = (256, 256)
 
         cameras = {
-            "wrist": SimCameraConfig(identifier="eye-in-hand_0", type=int(CameraType.fixed)),
-            "bird_eye": SimCameraConfig(identifier="bird-eye-cam", type=int(CameraType.fixed)),
+            "wrist": SimCameraConfig(identifier="wrist_0", type=int(CameraType.fixed)),
+            "bird_eye": SimCameraConfig(identifier="bird_eye_cam", type=int(CameraType.fixed)),
             "side": SimCameraConfig(identifier="side_view", type=int(CameraType.fixed)),
             "right_side": SimCameraConfig(identifier="right_side", type=int(CameraType.fixed)),
             "left_side": SimCameraConfig(identifier="left_side", type=int(CameraType.fixed)),
@@ -278,7 +278,7 @@ def __call__(  # type: ignore
         if resolution is None:
             resolution = (256, 256)
 
-        cameras = {"wrist": SimCameraConfig(identifier="eye-in-hand_0", type=int(CameraType.fixed))}
+        cameras = {"wrist": SimCameraConfig(identifier="wrist_0", type=int(CameraType.fixed))}
 
         camera_cfg = SimCameraSetConfig(
             cameras=cameras,
@@ -326,8 +326,8 @@ def __call__(  # type: ignore
             resolution = (256, 256)
 
         cameras = {
-            "wrist": SimCameraConfig(identifier="eye-in-hand_0", type=int(CameraType.fixed)),
-            "side": SimCameraConfig(identifier="eye-in-hand_1", type=int(CameraType.fixed)),
+            "wrist": SimCameraConfig(identifier="wrist_0", type=int(CameraType.fixed)),
+            "side": SimCameraConfig(identifier="wrist_1", type=int(CameraType.fixed)),
         }
 
         camera_cfg = SimCameraSetConfig(
diff --git a/python/rcsss/envs/sim.py b/python/rcsss/envs/sim.py
index 3ac7f498..3112ae7e 100644
--- a/python/rcsss/envs/sim.py
+++ b/python/rcsss/envs/sim.py
@@ -192,7 +192,7 @@ def reset(
         pos_x = iso_cube[0] + np.random.random() * 0.2 - 0.1
         pos_y = iso_cube[1] + np.random.random() * 0.2 - 0.1
 
-        self.sim.data.joint("yellow-box-joint").qpos[:3] = [pos_x, pos_y, pos_z]
+        self.sim.data.joint("box_joint").qpos[:3] = [pos_x, pos_y, pos_z]
 
         return obs, info
 
@@ -212,14 +212,12 @@ def step(self, action: dict[str, Any]):
         obs, reward, done, truncated, info = super().step(action)
 
         success = (
-            self.sim.data.joint("yellow-box-joint").qpos[2] > 0.3
-            and obs["gripper"] == GripperWrapper.BINARY_GRIPPER_CLOSED
+            self.sim.data.joint("box_joint").qpos[2] > 0.3 and obs["gripper"] == GripperWrapper.BINARY_GRIPPER_CLOSED
         )
         diff_ee_cube = np.linalg.norm(
-            self.sim.data.joint("yellow-box-joint").qpos[:3]
-            - self.unwrapped.robot.get_cartesian_position().translation()
+            self.sim.data.joint("box_joint").qpos[:3] - self.unwrapped.robot.get_cartesian_position().translation()
         )
-        diff_cube_home = np.linalg.norm(self.sim.data.joint("yellow-box-joint").qpos[:3] - self.EE_HOME)
+        diff_cube_home = np.linalg.norm(self.sim.data.joint("box_joint").qpos[:3] - self.EE_HOME)
         reward = -diff_cube_home - diff_ee_cube
 
         return obs, reward, success, truncated, info
diff --git a/python/rcsss/envs/utils.py b/python/rcsss/envs/utils.py
index b4b6537f..19bba80f 100644
--- a/python/rcsss/envs/utils.py
+++ b/python/rcsss/envs/utils.py
@@ -63,9 +63,9 @@ def default_fr3_sim_gripper_cfg():
 
 def default_mujoco_cameraset_cfg():
     cameras = {
-        "wrist": SimCameraConfig(identifier="eye-in-hand_0", type=int(CameraType.fixed)),
+        "wrist": SimCameraConfig(identifier="wrist_0", type=int(CameraType.fixed)),
         "default_free": SimCameraConfig(identifier="", type=int(CameraType.default_free)),
-        # "bird_eye": SimCameraConfig(identifier="bird-eye-cam", type=int(CameraType.fixed)),
+        # "bird_eye": SimCameraConfig(identifier="bird_eye_cam", type=int(CameraType.fixed)),
     }
     # 256x256 needed for VLAs
     return SimCameraSetConfig(