From 95e48086f8eac07517741cc21c8e1a67fadb174b Mon Sep 17 00:00:00 2001
From: mtorabi59 <mohammad.torabi@mail.mcgill.ca>
Date: Thu, 26 Jun 2025 16:49:53 -0400
Subject: [PATCH 01/12] add fuzzy to state-based

---
 pydfc/dfc.py                               | 84 +++++++++++++++++++++-
 pydfc/dfc_methods/cap.py                   | 12 +++-
 pydfc/dfc_methods/continuous_hmm.py        |  8 ++-
 pydfc/dfc_methods/discrete_hmm.py          |  5 ++
 pydfc/dfc_methods/sliding_window_clustr.py | 21 ++++++
 pydfc/dfc_methods/windowless.py            | 45 +++++++++++-
 6 files changed, 168 insertions(+), 7 deletions(-)

diff --git a/pydfc/dfc.py b/pydfc/dfc.py
index e02e662..d071c33 100644
--- a/pydfc/dfc.py
+++ b/pydfc/dfc.py
@@ -46,6 +46,9 @@ def __init__(self, measure=None):
         self.measure_ = measure
         self.FCSs_ = None  # is a dict
         self.FCS_idx_ = None  # is a dict
+        self.FCS_proba_ = (
+            None  # is a 2D numpy array of probabilities for each FCS at each time point
+        )
         # info of the time series used for dFC estimation
         self.TS_info_ = None
         self.TR_array_ = None
@@ -89,6 +92,14 @@ def FCSs(self):
     def FCS_idx(self):
         return self.FCS_idx_
 
+    @property
+    def FCS_proba(self):
+        """
+        FCS_proba is a 2D numpy array of probabilities for each FCS at each time point
+        shape = (n_time, n_states)
+        """
+        return self.FCS_proba_
+
     # test this
     @property
     def FCS_idx_array(self):
@@ -208,6 +219,54 @@ def get_dFC_mat(self, TRs=None, num_samples=None):
         else:
             return dFC_mat, TRs
 
+    def get_dFC_mat_fuzzy(self, TRs=None, num_samples=None):
+        """
+        ONLY FOR STATE-BASED METHODS
+
+        get dFC matrices corresponding to
+        the specified TRs based on fuzzy states
+
+        TRs should be list/ndarray not necessarily in order ?
+        if num_samples specified, it will downsample
+        TRs to reach that number of samples and will also
+        return picked TRs
+        if num_samples > len(TRs) -> picks all TRs
+        """
+
+        if not self.measure.is_state_based:
+            raise ValueError(
+                "This method is only applicable to state-based methods. "
+                "Please use get_dFC_mat() for state-free methods."
+            )
+
+        if TRs is None:
+            TRs = self.TR_array
+
+        if type(TRs) is np.int32 or type(TRs) is np.int64 or type(TRs) is int:
+            TRs = [TRs]
+
+        if not num_samples is None:
+            if num_samples < len(TRs):
+                TRs = TRs[
+                    np.linspace(0, len(TRs), num_samples, endpoint=False, dtype=int)
+                ]
+
+        dFC_mat = list()
+        for TR in TRs:
+            TR_index = np.where(self.TR_array == TR)[0]
+            FC_mat = np.zeros((self.n_regions, self.n_regions))
+            for i in range(self.FCS_proba.shape[1]):  # iterate over states
+                prob = self.FCS_proba[TR_index, i]
+                FC_mat += prob * self.FCSs[f"FCS{i + 1}"]
+            dFC_mat.append(FC_mat)
+
+        dFC_mat = np.array(dFC_mat)
+
+        if num_samples is None:
+            return dFC_mat
+        else:
+            return dFC_mat, TRs
+
     def SWed_dFC_mat(self, W=None, n_overlap=None, tapered_window=False):
         """
         the time samples will be picked after
@@ -230,10 +289,11 @@ def SWed_dFC_mat(self, W=None, n_overlap=None, tapered_window=False):
 
         return dFC_mat_new
 
-    def set_dFC(self, FCSs, FCS_idx=None, TS_info=None, TR_array=None):
+    def set_dFC(self, FCSs, FCS_idx=None, FCS_proba=None, TS_info=None, TR_array=None):
         """
-        FCSs: a 3D numpy array of FC matrices with shape (n_time, n_regions, n_regions)
-        FCS_idx: a list of indices that correspond to each FC matrix in FCSs over time
+        FCSs: a 3D numpy array of FC matrices with shape (n_states, n_regions, n_regions), for state-free methods: (n_time, n_regions, n_regions)
+        FCS_idx: a list of indices that correspond to each FC matrix in FCSs over time, used for state-based methods.
+        FCS_proba: a 2D numpy array of probabilities for each FCS at each time point, shape = (n_time, n_states), used for state-based methods.
         """
 
         if len(FCSs.shape) == 2:
@@ -256,6 +316,17 @@ def set_dFC(self, FCSs, FCS_idx=None, TS_info=None, TR_array=None):
             self.n_time == -1
         ), "why n_time is not -1 ? Are you adding a dFC to an existing dFC ?"
 
+        if FCS_proba is not None and FCS_idx is not None:
+            assert FCS_proba.shape[0] == len(
+                FCS_idx
+            ), "FCS_proba shape does not match FCSs shape (n_time)."
+            assert (
+                FCS_proba.shape[1] == FCSs.shape[0]
+            ), "FCS_proba shape does not match FCSs shape (n_states)."
+            assert np.allclose(
+                FCS_proba.sum(axis=1), 1
+            ), "FCS_proba probabilities must sum to 1 for each time point."
+
         if TR_array is None:
             # self.n_time is -1 at first. if it is not -1, it means that a dFC is already set and
             # we are adding a new dFC to it.
@@ -268,6 +339,11 @@ def set_dFC(self, FCSs, FCS_idx=None, TS_info=None, TR_array=None):
 
         assert np.sum(np.abs(np.sort(TR_array) - TR_array)) == 0.0, "TRs not sorted !"
 
+        assert len(TR_array) == FCS_proba.shape[0], (
+            "TR_array length does not match FCS_proba shape (n_time). "
+            f"TR_array length: {len(TR_array)}, FCS_proba shape: {FCS_proba.shape}"
+        )
+
         # the input FCS_idx is ranged from 0 to len(FCS)-1 but we shift it to 1 to len(FCS)
         self.FCSs_ = {}
         for i, FCS in enumerate(FCSs):
@@ -277,6 +353,8 @@ def set_dFC(self, FCSs, FCS_idx=None, TS_info=None, TR_array=None):
         for i, idx in enumerate(FCS_idx):
             self.FCS_idx_[f"TR{TR_array[i]}"] = f"FCS{idx + 1}"  # "FCS" + str(idx + 1)
 
+        self.FCS_proba_ = FCS_proba
+
         self.TS_info_ = TS_info
         self.n_regions_ = FCSs.shape[1]
         self.n_time_ = len(self.FCS_idx_)
diff --git a/pydfc/dfc_methods/cap.py b/pydfc/dfc_methods/cap.py
index 0ee51bf..0938412 100644
--- a/pydfc/dfc_methods/cap.py
+++ b/pydfc/dfc_methods/cap.py
@@ -8,6 +8,7 @@
 import time
 
 import numpy as np
+from scipy.special import softmax
 from sklearn.cluster import KMeans
 
 from ..dfc import DFC
@@ -151,12 +152,21 @@ def estimate_dFC(self, time_series):
         act_vecs = time_series.data.T
 
         Z = self.kmeans_.predict(act_vecs.astype(np.float32))
+        # get distances from the cluster centers for each sample
+        distances = self.kmeans_.transform(
+            act_vecs.astype(np.float32)
+        )  # shape: (n_samples, n_clusters) = (n_time, n_states)
+        # Convert to prbability using softmax on negative distances
+        temperature = 1.0  # you can tune this
+        Z_proba = softmax(-distances / temperature, axis=1)
 
         # record time
         self.set_dFC_assess_time(time.time() - tic)
 
         dFC = DFC(measure=self)
-        dFC.set_dFC(FCSs=self.FCS_, FCS_idx=Z, TS_info=time_series.info_dict)
+        dFC.set_dFC(
+            FCSs=self.FCS_, FCS_idx=Z, FCS_proba=Z_proba, TS_info=time_series.info_dict
+        )
         return dFC
 
 
diff --git a/pydfc/dfc_methods/continuous_hmm.py b/pydfc/dfc_methods/continuous_hmm.py
index f0a8fe8..7082d3d 100644
--- a/pydfc/dfc_methods/continuous_hmm.py
+++ b/pydfc/dfc_methods/continuous_hmm.py
@@ -122,12 +122,18 @@ def estimate_dFC(self, time_series):
         tic = time.time()
 
         Z = self.hmm_model.predict(time_series.data.T)
+        # get pribabilities for each state for each time point
+        Z_proba = self.hmm_model.predict_proba(
+            time_series.data.T
+        )  # shape: (n_samples, n_components) = (n_time, n_states)
 
         # record time
         self.set_dFC_assess_time(time.time() - tic)
 
         dFC = DFC(measure=self)
-        dFC.set_dFC(FCSs=self.FCS_, FCS_idx=Z, TS_info=time_series.info_dict)
+        dFC.set_dFC(
+            FCSs=self.FCS_, FCS_idx=Z, FCS_proba=Z_proba, TS_info=time_series.info_dict
+        )
 
         return dFC
 
diff --git a/pydfc/dfc_methods/discrete_hmm.py b/pydfc/dfc_methods/discrete_hmm.py
index 77e1a91..03bde05 100644
--- a/pydfc/dfc_methods/discrete_hmm.py
+++ b/pydfc/dfc_methods/discrete_hmm.py
@@ -183,6 +183,10 @@ def estimate_dFC(self, time_series):
         Obs_seq = FCC.FCS_idx_array.reshape(-1, 1)
 
         Z = self.hmm_model.predict(Obs_seq)
+        # get pribabilities for each state for each time point
+        Z_proba = self.hmm_model.predict_proba(
+            Obs_seq
+        )  # shape: (n_samples, n_components) = (n_time, n_states)
 
         # record time
         self.set_dFC_assess_time(time.time() - tic)
@@ -191,6 +195,7 @@ def estimate_dFC(self, time_series):
         dFC.set_dFC(
             FCSs=self.FCS_,
             FCS_idx=Z,
+            FCS_proba=Z_proba,
             TS_info=time_series.info_dict,
             TR_array=FCC.TR_array,
         )
diff --git a/pydfc/dfc_methods/sliding_window_clustr.py b/pydfc/dfc_methods/sliding_window_clustr.py
index 25d7386..2fc7194 100644
--- a/pydfc/dfc_methods/sliding_window_clustr.py
+++ b/pydfc/dfc_methods/sliding_window_clustr.py
@@ -8,6 +8,7 @@
 import time
 
 import numpy as np
+from scipy.special import softmax
 from sklearn.cluster import KMeans
 
 from ..dfc import DFC
@@ -239,9 +240,28 @@ def estimate_dFC(self, time_series):
         if self.params["clstr_distance"] == "manhattan":
             ########### Manhattan Clustering ##############
             Z = self.kmeans_.predict(F.astype(np.float32))
+            # get distances from the cluster centers for each sample
+            distances = self.kmeans_.transform(
+                F.astype(np.float32)
+            )  # shape: (n_samples, n_clusters)
+            # Convert to prbability using softmax on negative distances
+            temperature = 1.0  # you can tune this
+            Z_proba = softmax(
+                -distances / temperature, axis=1
+            )  # shape: (n_samples, n_clusters) = (n_time, n_states)
+
         else:
             ########### Euclidean Clustering ##############
             Z = self.kmeans_.predict(F.astype(np.float32))
+            # get distances from the cluster centers for each sample
+            distances = self.kmeans_.transform(
+                F.astype(np.float32)
+            )  # shape: (n_samples, n_clusters)
+            # Convert to prbability using softmax on negative distances
+            temperature = 1.0  # you can tune this
+            Z_proba = softmax(
+                -distances / temperature, axis=1
+            )  # shape: (n_samples, n_clusters) = (n_time, n_states)
 
         # record time
         self.set_dFC_assess_time(time.time() - tic)
@@ -250,6 +270,7 @@ def estimate_dFC(self, time_series):
         dFC.set_dFC(
             FCSs=self.FCS_,
             FCS_idx=Z,
+            FCS_proba=Z_proba,
             TS_info=time_series.info_dict,
             TR_array=dFC_raw.TR_array,
         )
diff --git a/pydfc/dfc_methods/windowless.py b/pydfc/dfc_methods/windowless.py
index 65a5b15..2c4e722 100644
--- a/pydfc/dfc_methods/windowless.py
+++ b/pydfc/dfc_methods/windowless.py
@@ -9,6 +9,7 @@
 
 import numpy as np
 from ksvd import ApproximateKSVD
+from sklearn.linear_model import orthogonal_mp_gram
 
 from ..dfc import DFC
 from ..time_series import TIME_SERIES
@@ -110,6 +111,27 @@ def estimate_FCS(self, time_series):
 
         return self
 
+    def transform_proba(self, D, X, n_nonzero_coefs=None):
+        """
+        returns the probability of each state for each time point
+        D: dictionary, shape = (n_states, n_regions)
+        X: time series data, shape = (n_time, n_regions)
+        n_nonzero_coefs: number of non-zero coefficients to use in orthogonal matching pursuit
+        Returns:
+        Z_proba: shape = (n_time, n_states)
+        """
+        gram = D.dot(D.T)  # shape: (n_features, n_features) = (n_states, n_states)
+        Xy = D.dot(X.T)  # shape: (n_features, n_targets) = (n_states, n_time)
+
+        if n_nonzero_coefs is None:
+            n_nonzero_coefs = D.shape[0]
+
+        gamma = orthogonal_mp_gram(gram, Xy, n_nonzero_coefs=n_nonzero_coefs).T
+
+        Z_proba = np.abs(gamma) / np.abs(gamma).sum(axis=1, keepdims=True)
+
+        return Z_proba
+
     def estimate_dFC(self, time_series):
 
         assert (
@@ -125,17 +147,36 @@ def estimate_dFC(self, time_series):
         # start timing
         tic = time.time()
 
-        gamma = self.aksvd.transform(time_series.data.T)
+        gamma = self.aksvd.transform(time_series.data.T)  # shape: (n_time, n_states)
 
         Z = list()
         for i in range(time_series.n_time):
             Z.append(np.argwhere(gamma[i, :] != 0)[0, 0])
 
+        # get probability for each state for each time point
+        Z_proba = self.transform_proba(
+            D=self.dictionary,
+            X=time_series.data.T,
+            n_nonzero_coefs=self.params["n_states"],
+        )  # shape: (n_targets, n_features) = (n_time, n_states)
+
+        assert Z_proba.shape[0] == time_series.n_time, (
+            "Z_proba shape does not match time_series.n_time. "
+            f"Z_proba shape: {Z_proba.shape}, time_series.n_time: {time_series.n_time}"
+        )
+
+        assert Z_proba.shape[1] == self.params["n_states"], (
+            "Z_proba shape does not match n_states. "
+            f"Z_proba shape: {Z_proba.shape}, n_states: {self.params['n_states']}"
+        )
+
         # record time
         self.set_dFC_assess_time(time.time() - tic)
 
         dFC = DFC(measure=self)
-        dFC.set_dFC(FCSs=self.FCS_, FCS_idx=Z, TS_info=time_series.info_dict)
+        dFC.set_dFC(
+            FCSs=self.FCS_, FCS_idx=Z, FCS_proba=Z_proba, TS_info=time_series.info_dict
+        )
         return dFC
 
 

From 2db1ff8bbcce6e76081fbc5205770cb6dfc6f62a Mon Sep 17 00:00:00 2001
From: mtorabi59 <mohammad.torabi@mail.mcgill.ca>
Date: Thu, 26 Jun 2025 17:10:10 -0400
Subject: [PATCH 02/12] minor

---
 pydfc/dfc_methods/sliding_window_clustr.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pydfc/dfc_methods/sliding_window_clustr.py b/pydfc/dfc_methods/sliding_window_clustr.py
index 2fc7194..7ccfda5 100644
--- a/pydfc/dfc_methods/sliding_window_clustr.py
+++ b/pydfc/dfc_methods/sliding_window_clustr.py
@@ -83,6 +83,10 @@ def __init__(self, **params):
         self.params["measure_name"] = "Clustering"
         self.params["is_state_based"] = True
 
+        if self.params["clstr_distance"] is None:
+            # Default clustering distance is euclidean
+            self.params["clstr_distance"] = "euclidean"
+
         assert (
             self.params["clstr_distance"] == "euclidean"
             or self.params["clstr_distance"] == "manhattan"

From a31107d56b6907d4c5f05a343cd5d755788f31a3 Mon Sep 17 00:00:00 2001
From: mtorabi59 <mohammad.torabi@mail.mcgill.ca>
Date: Thu, 26 Jun 2025 17:21:38 -0400
Subject: [PATCH 03/12] minor adjustment

---
 pydfc/dfc.py | 77 ++++++++++++++++------------------------------------
 1 file changed, 24 insertions(+), 53 deletions(-)

diff --git a/pydfc/dfc.py b/pydfc/dfc.py
index d071c33..9885469 100644
--- a/pydfc/dfc.py
+++ b/pydfc/dfc.py
@@ -171,21 +171,22 @@ def state_act_dict(self, TRs=None):
         return state_act_dict
 
     # test
-    def dFC2dict(self, TRs=None):
+    def dFC2dict(self, TRs=None, fuzzy=False):
         # return dFC samples as a dictionary
         if TRs is None:
             TRs = self.TR_array
         if type(TRs) is list:
             TRs = np.array(TRs)
         TRs = TRs.astype(int)
-        dFC_mat = self.get_dFC_mat(TRs=TRs)
+
+        dFC_mat = self.get_dFC_mat(TRs=TRs, fuzzy=fuzzy)
+
         dFC_dict = {}
         for k, TR in enumerate(TRs):
             dFC_dict[f"TR{TR}"] = dFC_mat[k, :, :]
         return dFC_dict
 
-    # test this
-    def get_dFC_mat(self, TRs=None, num_samples=None):
+    def get_dFC_mat(self, TRs=None, num_samples=None, fuzzy=False):
         """
         get dFC matrices corresponding to
         the specified TRs
@@ -194,50 +195,16 @@ def get_dFC_mat(self, TRs=None, num_samples=None):
         TRs to reach that number of samples and will also
         return picked TRs
         if num_samples > len(TRs) -> picks all TRs
-        """
-
-        if TRs is None:
-            TRs = self.TR_array
-
-        if type(TRs) is np.int32 or type(TRs) is np.int64 or type(TRs) is int:
-            TRs = [TRs]
-
-        if not num_samples is None:
-            if num_samples < len(TRs):
-                TRs = TRs[
-                    np.linspace(0, len(TRs), num_samples, endpoint=False, dtype=int)
-                ]
-
-        dFC_mat = list()
-        for TR in TRs:
-            dFC_mat.append(self.FCSs[self.FCS_idx[f"TR{TR}"]])
 
-        dFC_mat = np.array(dFC_mat)
-
-        if num_samples is None:
-            return dFC_mat
-        else:
-            return dFC_mat, TRs
-
-    def get_dFC_mat_fuzzy(self, TRs=None, num_samples=None):
-        """
-        ONLY FOR STATE-BASED METHODS
-
-        get dFC matrices corresponding to
-        the specified TRs based on fuzzy states
-
-        TRs should be list/ndarray not necessarily in order ?
-        if num_samples specified, it will downsample
-        TRs to reach that number of samples and will also
-        return picked TRs
-        if num_samples > len(TRs) -> picks all TRs
+        ONLY FOR STATE-BASED METHODS:
+        if fuzzy is True, it will return dFC matrices based on fuzzy states
         """
-
-        if not self.measure.is_state_based:
-            raise ValueError(
-                "This method is only applicable to state-based methods. "
-                "Please use get_dFC_mat() for state-free methods."
-            )
+        if fuzzy:
+            if not self.measure.is_state_based:
+                raise ValueError(
+                    "This method is only applicable to state-based methods. "
+                    "Please use get_dFC_mat() for state-free methods."
+                )
 
         if TRs is None:
             TRs = self.TR_array
@@ -253,11 +220,14 @@ def get_dFC_mat_fuzzy(self, TRs=None, num_samples=None):
 
         dFC_mat = list()
         for TR in TRs:
-            TR_index = np.where(self.TR_array == TR)[0]
-            FC_mat = np.zeros((self.n_regions, self.n_regions))
-            for i in range(self.FCS_proba.shape[1]):  # iterate over states
-                prob = self.FCS_proba[TR_index, i]
-                FC_mat += prob * self.FCSs[f"FCS{i + 1}"]
+            if fuzzy:
+                TR_index = np.where(self.TR_array == TR)[0]
+                FC_mat = np.zeros((self.n_regions, self.n_regions))
+                for i in range(self.FCS_proba.shape[1]):  # iterate over states
+                    prob = self.FCS_proba[TR_index, i]
+                    FC_mat += prob * self.FCSs[f"FCS{i + 1}"]
+            else:
+                FC_mat = self.FCSs[self.FCS_idx[f"TR{TR}"]]
             dFC_mat.append(FC_mat)
 
         dFC_mat = np.array(dFC_mat)
@@ -363,6 +333,7 @@ def set_dFC(self, FCSs, FCS_idx=None, FCS_proba=None, TS_info=None, TR_array=Non
     def visualize_dFC(
         self,
         TRs=None,
+        fuzzy=False,
         normalize=False,
         show_networks=False,
         rank_norm=False,
@@ -386,11 +357,11 @@ def visualize_dFC(
             node_networks = None
 
         if rank_norm:
-            dFC_dict = rank_norm_dFC_dict(self.dFC2dict(TRs=TRs))
+            dFC_dict = rank_norm_dFC_dict(self.dFC2dict(TRs=TRs, fuzzy=fuzzy))
             cmap = "plasma"
             center_0 = False
         else:
-            dFC_dict = self.dFC2dict(TRs=TRs)
+            dFC_dict = self.dFC2dict(TRs=TRs, fuzzy=fuzzy)
             cmap = "seismic"
             center_0 = True
 

From 2e70d0e2cebede73b5844d7af79c99bc66b8669c Mon Sep 17 00:00:00 2001
From: mtorabi59 <mohammad.torabi@mail.mcgill.ca>
Date: Thu, 26 Jun 2025 17:26:16 -0400
Subject: [PATCH 04/12] minor fix

---
 pydfc/dfc.py | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/pydfc/dfc.py b/pydfc/dfc.py
index 9885469..2410318 100644
--- a/pydfc/dfc.py
+++ b/pydfc/dfc.py
@@ -286,17 +286,6 @@ def set_dFC(self, FCSs, FCS_idx=None, FCS_proba=None, TS_info=None, TR_array=Non
             self.n_time == -1
         ), "why n_time is not -1 ? Are you adding a dFC to an existing dFC ?"
 
-        if FCS_proba is not None and FCS_idx is not None:
-            assert FCS_proba.shape[0] == len(
-                FCS_idx
-            ), "FCS_proba shape does not match FCSs shape (n_time)."
-            assert (
-                FCS_proba.shape[1] == FCSs.shape[0]
-            ), "FCS_proba shape does not match FCSs shape (n_states)."
-            assert np.allclose(
-                FCS_proba.sum(axis=1), 1
-            ), "FCS_proba probabilities must sum to 1 for each time point."
-
         if TR_array is None:
             # self.n_time is -1 at first. if it is not -1, it means that a dFC is already set and
             # we are adding a new dFC to it.
@@ -309,10 +298,20 @@ def set_dFC(self, FCSs, FCS_idx=None, FCS_proba=None, TS_info=None, TR_array=Non
 
         assert np.sum(np.abs(np.sort(TR_array) - TR_array)) == 0.0, "TRs not sorted !"
 
-        assert len(TR_array) == FCS_proba.shape[0], (
-            "TR_array length does not match FCS_proba shape (n_time). "
-            f"TR_array length: {len(TR_array)}, FCS_proba shape: {FCS_proba.shape}"
-        )
+        if FCS_proba is not None and FCS_idx is not None:
+            assert FCS_proba.shape[0] == len(
+                FCS_idx
+            ), "FCS_proba shape does not match FCSs shape (n_time)."
+            assert (
+                FCS_proba.shape[1] == FCSs.shape[0]
+            ), "FCS_proba shape does not match FCSs shape (n_states)."
+            assert np.allclose(
+                FCS_proba.sum(axis=1), 1
+            ), "FCS_proba probabilities must sum to 1 for each time point."
+            assert len(TR_array) == FCS_proba.shape[0], (
+                "TR_array length does not match FCS_proba shape (n_time). "
+                f"TR_array length: {len(TR_array)}, FCS_proba shape: {FCS_proba.shape}"
+            )
 
         # the input FCS_idx is ranged from 0 to len(FCS)-1 but we shift it to 1 to len(FCS)
         self.FCSs_ = {}

From 7b678d14d01869704e9785cc0a73f066ee600537 Mon Sep 17 00:00:00 2001
From: mtorabi59 <mohammad.torabi@mail.mcgill.ca>
Date: Thu, 26 Jun 2025 18:11:37 -0400
Subject: [PATCH 05/12] change kmeans Z_proba

---
 pydfc/dfc_methods/cap.py                   | 10 +++++--
 pydfc/dfc_methods/sliding_window_clustr.py | 32 ++++++++++++++--------
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/pydfc/dfc_methods/cap.py b/pydfc/dfc_methods/cap.py
index 0938412..a97c5d6 100644
--- a/pydfc/dfc_methods/cap.py
+++ b/pydfc/dfc_methods/cap.py
@@ -156,9 +156,13 @@ def estimate_dFC(self, time_series):
         distances = self.kmeans_.transform(
             act_vecs.astype(np.float32)
         )  # shape: (n_samples, n_clusters) = (n_time, n_states)
-        # Convert to prbability using softmax on negative distances
-        temperature = 1.0  # you can tune this
-        Z_proba = softmax(-distances / temperature, axis=1)
+        # # Convert to prbability using softmax on negative distances
+        # temperature = 1.0  # you can tune this
+        # Z_proba = softmax(-distances / temperature, axis=1)
+        rel = -distances
+        rel = rel - rel.min(axis=1, keepdims=True)  # shift min to 0
+        rel = rel / rel.sum(axis=1, keepdims=True)  # normalize
+        Z_proba = rel
 
         # record time
         self.set_dFC_assess_time(time.time() - tic)
diff --git a/pydfc/dfc_methods/sliding_window_clustr.py b/pydfc/dfc_methods/sliding_window_clustr.py
index 7ccfda5..de0dea3 100644
--- a/pydfc/dfc_methods/sliding_window_clustr.py
+++ b/pydfc/dfc_methods/sliding_window_clustr.py
@@ -10,6 +10,7 @@
 import numpy as np
 from scipy.special import softmax
 from sklearn.cluster import KMeans
+from sklearn.preprocessing import StandardScaler
 
 from ..dfc import DFC
 from ..dfc_utils import KMeansCustom, dFC_mat2vec, dFC_vec2mat
@@ -248,12 +249,17 @@ def estimate_dFC(self, time_series):
             distances = self.kmeans_.transform(
                 F.astype(np.float32)
             )  # shape: (n_samples, n_clusters)
-            # Convert to prbability using softmax on negative distances
-            temperature = 1.0  # you can tune this
-            Z_proba = softmax(
-                -distances / temperature, axis=1
-            )  # shape: (n_samples, n_clusters) = (n_time, n_states)
-
+            # # scale distances
+            # scaled_distances = StandardScaler().fit_transform(distances)
+            # # Convert to prbability using softmax on negative distances
+            # temperature = 1.0  # you can tune this
+            # Z_proba = softmax(
+            #     -distances / temperature, axis=1
+            # )  # shape: (n_samples, n_clusters) = (n_time, n_states)
+            rel = -distances
+            rel = rel - rel.min(axis=1, keepdims=True)  # shift min to 0
+            rel = rel / rel.sum(axis=1, keepdims=True)  # normalize
+            Z_proba = rel
         else:
             ########### Euclidean Clustering ##############
             Z = self.kmeans_.predict(F.astype(np.float32))
@@ -261,11 +267,15 @@ def estimate_dFC(self, time_series):
             distances = self.kmeans_.transform(
                 F.astype(np.float32)
             )  # shape: (n_samples, n_clusters)
-            # Convert to prbability using softmax on negative distances
-            temperature = 1.0  # you can tune this
-            Z_proba = softmax(
-                -distances / temperature, axis=1
-            )  # shape: (n_samples, n_clusters) = (n_time, n_states)
+            # # Convert to prbability using softmax on negative distances
+            # temperature = 1.0  # you can tune this
+            # Z_proba = softmax(
+            #     -distances / temperature, axis=1
+            # )  # shape: (n_samples, n_clusters) = (n_time, n_states)
+            rel = -distances
+            rel = rel - rel.min(axis=1, keepdims=True)  # shift min to 0
+            rel = rel / rel.sum(axis=1, keepdims=True)  # normalize
+            Z_proba = rel
 
         # record time
         self.set_dFC_assess_time(time.time() - tic)

From 871f0e1592a23caa44344dd785fbb264b815dea8 Mon Sep 17 00:00:00 2001
From: mtorabi59 <mohammad.torabi@mail.mcgill.ca>
Date: Thu, 26 Jun 2025 20:37:23 -0400
Subject: [PATCH 06/12] change kmeans z_proba

---
 pydfc/dfc.py                               | 30 +++++-----------------
 pydfc/dfc_methods/cap.py                   |  6 ++---
 pydfc/dfc_methods/sliding_window_clustr.py | 18 +++----------
 3 files changed, 12 insertions(+), 42 deletions(-)

diff --git a/pydfc/dfc.py b/pydfc/dfc.py
index 2410318..6cf595e 100644
--- a/pydfc/dfc.py
+++ b/pydfc/dfc.py
@@ -171,7 +171,7 @@ def state_act_dict(self, TRs=None):
         return state_act_dict
 
     # test
-    def dFC2dict(self, TRs=None, fuzzy=False):
+    def dFC2dict(self, TRs=None):
         # return dFC samples as a dictionary
         if TRs is None:
             TRs = self.TR_array
@@ -179,14 +179,14 @@ def dFC2dict(self, TRs=None, fuzzy=False):
             TRs = np.array(TRs)
         TRs = TRs.astype(int)
 
-        dFC_mat = self.get_dFC_mat(TRs=TRs, fuzzy=fuzzy)
+        dFC_mat = self.get_dFC_mat(TRs=TRs)
 
         dFC_dict = {}
         for k, TR in enumerate(TRs):
             dFC_dict[f"TR{TR}"] = dFC_mat[k, :, :]
         return dFC_dict
 
-    def get_dFC_mat(self, TRs=None, num_samples=None, fuzzy=False):
+    def get_dFC_mat(self, TRs=None, num_samples=None):
         """
         get dFC matrices corresponding to
         the specified TRs
@@ -195,17 +195,7 @@ def get_dFC_mat(self, TRs=None, num_samples=None, fuzzy=False):
         TRs to reach that number of samples and will also
         return picked TRs
         if num_samples > len(TRs) -> picks all TRs
-
-        ONLY FOR STATE-BASED METHODS:
-        if fuzzy is True, it will return dFC matrices based on fuzzy states
         """
-        if fuzzy:
-            if not self.measure.is_state_based:
-                raise ValueError(
-                    "This method is only applicable to state-based methods. "
-                    "Please use get_dFC_mat() for state-free methods."
-                )
-
         if TRs is None:
             TRs = self.TR_array
 
@@ -220,14 +210,7 @@ def get_dFC_mat(self, TRs=None, num_samples=None, fuzzy=False):
 
         dFC_mat = list()
         for TR in TRs:
-            if fuzzy:
-                TR_index = np.where(self.TR_array == TR)[0]
-                FC_mat = np.zeros((self.n_regions, self.n_regions))
-                for i in range(self.FCS_proba.shape[1]):  # iterate over states
-                    prob = self.FCS_proba[TR_index, i]
-                    FC_mat += prob * self.FCSs[f"FCS{i + 1}"]
-            else:
-                FC_mat = self.FCSs[self.FCS_idx[f"TR{TR}"]]
+            FC_mat = self.FCSs[self.FCS_idx[f"TR{TR}"]]
             dFC_mat.append(FC_mat)
 
         dFC_mat = np.array(dFC_mat)
@@ -332,7 +315,6 @@ def set_dFC(self, FCSs, FCS_idx=None, FCS_proba=None, TS_info=None, TR_array=Non
     def visualize_dFC(
         self,
         TRs=None,
-        fuzzy=False,
         normalize=False,
         show_networks=False,
         rank_norm=False,
@@ -356,11 +338,11 @@ def visualize_dFC(
             node_networks = None
 
         if rank_norm:
-            dFC_dict = rank_norm_dFC_dict(self.dFC2dict(TRs=TRs, fuzzy=fuzzy))
+            dFC_dict = rank_norm_dFC_dict(self.dFC2dict(TRs=TRs))
             cmap = "plasma"
             center_0 = False
         else:
-            dFC_dict = self.dFC2dict(TRs=TRs, fuzzy=fuzzy)
+            dFC_dict = self.dFC2dict(TRs=TRs)
             cmap = "seismic"
             center_0 = True
 
diff --git a/pydfc/dfc_methods/cap.py b/pydfc/dfc_methods/cap.py
index a97c5d6..ed0b21c 100644
--- a/pydfc/dfc_methods/cap.py
+++ b/pydfc/dfc_methods/cap.py
@@ -156,13 +156,11 @@ def estimate_dFC(self, time_series):
         distances = self.kmeans_.transform(
             act_vecs.astype(np.float32)
         )  # shape: (n_samples, n_clusters) = (n_time, n_states)
-        # # Convert to prbability using softmax on negative distances
-        # temperature = 1.0  # you can tune this
-        # Z_proba = softmax(-distances / temperature, axis=1)
+        # normalize distances to semi probabilities
         rel = -distances
         rel = rel - rel.min(axis=1, keepdims=True)  # shift min to 0
         rel = rel / rel.sum(axis=1, keepdims=True)  # normalize
-        Z_proba = rel
+        Z_proba = rel  # shape: (n_samples, n_clusters) = (n_time, n_states)
 
         # record time
         self.set_dFC_assess_time(time.time() - tic)
diff --git a/pydfc/dfc_methods/sliding_window_clustr.py b/pydfc/dfc_methods/sliding_window_clustr.py
index de0dea3..6674087 100644
--- a/pydfc/dfc_methods/sliding_window_clustr.py
+++ b/pydfc/dfc_methods/sliding_window_clustr.py
@@ -249,17 +249,11 @@ def estimate_dFC(self, time_series):
             distances = self.kmeans_.transform(
                 F.astype(np.float32)
             )  # shape: (n_samples, n_clusters)
-            # # scale distances
-            # scaled_distances = StandardScaler().fit_transform(distances)
-            # # Convert to prbability using softmax on negative distances
-            # temperature = 1.0  # you can tune this
-            # Z_proba = softmax(
-            #     -distances / temperature, axis=1
-            # )  # shape: (n_samples, n_clusters) = (n_time, n_states)
+            # normalize distances to semi probabilities
             rel = -distances
             rel = rel - rel.min(axis=1, keepdims=True)  # shift min to 0
             rel = rel / rel.sum(axis=1, keepdims=True)  # normalize
-            Z_proba = rel
+            Z_proba = rel  # shape: (n_samples, n_clusters) = (n_time, n_states)
         else:
             ########### Euclidean Clustering ##############
             Z = self.kmeans_.predict(F.astype(np.float32))
@@ -267,15 +261,11 @@ def estimate_dFC(self, time_series):
             distances = self.kmeans_.transform(
                 F.astype(np.float32)
             )  # shape: (n_samples, n_clusters)
-            # # Convert to prbability using softmax on negative distances
-            # temperature = 1.0  # you can tune this
-            # Z_proba = softmax(
-            #     -distances / temperature, axis=1
-            # )  # shape: (n_samples, n_clusters) = (n_time, n_states)
+            # normalize distances to semi probabilities
             rel = -distances
             rel = rel - rel.min(axis=1, keepdims=True)  # shift min to 0
             rel = rel / rel.sum(axis=1, keepdims=True)  # normalize
-            Z_proba = rel
+            Z_proba = rel  # shape: (n_samples, n_clusters) = (n_time, n_states)
 
         # record time
         self.set_dFC_assess_time(time.time() - tic)

From 926bed374d341f8e3b7d60d11d80334db18aef06 Mon Sep 17 00:00:00 2001
From: mtorabi59 <mohammad.torabi@mail.mcgill.ca>
Date: Wed, 2 Jul 2025 16:06:30 -0400
Subject: [PATCH 07/12] add transform to KMeansCustom

---
 pydfc/dfc_utils.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pydfc/dfc_utils.py b/pydfc/dfc_utils.py
index e1a449c..da77e4b 100644
--- a/pydfc/dfc_utils.py
+++ b/pydfc/dfc_utils.py
@@ -509,6 +509,13 @@ def predict(self, X):
         X = deepcopy(X)
         return self._assign_clusters(X, self.cluster_centers_)
 
+    def transform(self, X):
+        """
+        Transform the data to cluster centers
+        """
+        X = deepcopy(X)
+        return pairwise_distances(X, self.cluster_centers_, metric=self.metric)
+
 
 ####################################################################################################
 

From bc612ea3da5bdf7c8d4c1775429c9d314a70eeb4 Mon Sep 17 00:00:00 2001
From: mtorabi59 <mohammad.torabi@mail.mcgill.ca>
Date: Fri, 4 Jul 2025 14:19:58 -0400
Subject: [PATCH 08/12] fix nilearn dependency

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 704e014..3a662f8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@ dependencies = [
     'ksvd',
     'matplotlib',
     'networkx',
-    'nilearn>=0.10.2,!=0.10.3',
+    'nilearn==0.10.2',
     'pycwt',
     'seaborn',
     'statsmodels'

From 2b5f691053c205f988776589cdca8fc597cdfc0e Mon Sep 17 00:00:00 2001
From: mtorabi59 <mohammad.torabi@mail.mcgill.ca>
Date: Fri, 4 Jul 2025 19:35:08 -0400
Subject: [PATCH 09/12] make KMeansCustom faster

---
 pydfc/dfc_utils.py | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/pydfc/dfc_utils.py b/pydfc/dfc_utils.py
index da77e4b..6a351c3 100644
--- a/pydfc/dfc_utils.py
+++ b/pydfc/dfc_utils.py
@@ -460,11 +460,13 @@ def _custom_distance(self, p1, p2):
         return pairwise_distances([p1], [p2], metric=self.metric)[0][0]
 
     def _assign_clusters(self, X, centroids):
-        clusters = []
-        for x in X:
-            distances = [self._custom_distance(x, c) for c in centroids]
-            clusters.append(np.argmin(distances))
-        return clusters
+        if self.metric == "manhattan":
+            distances = np.abs(X[:, None, :] - centroids[None, :, :]).sum(axis=2)
+        elif self.metric == "euclidean":
+            distances = np.linalg.norm(X[:, None, :] - centroids[None, :, :], axis=2)
+        else:
+            distances = pairwise_distances(X, centroids, metric=self.metric)
+        return np.argmin(distances, axis=1)
 
     def _compute_centroids(self, X, labels):
         centroids = []
@@ -474,7 +476,6 @@ def _compute_centroids(self, X, labels):
         return np.array(centroids)
 
     def fit(self, X):
-        X = deepcopy(X)
         min_inertia = None
         best_centroids = None
         best_labels = None
@@ -489,12 +490,18 @@ def fit(self, X):
                 if np.allclose(centroids, new_centroids, atol=1e-6):
                     break
                 centroids = new_centroids
-            inertia = np.sum(
-                [
-                    self._custom_distance(x, centroids[label]) ** 2
-                    for x, label in zip(X, labels)
-                ]
-            )
+
+            if self.metric == "manhattan":
+                distances = np.abs(X - centroids[labels]).sum(axis=1)
+            elif self.metric == "euclidean":
+                distances = np.linalg.norm(X - centroids[labels], axis=1)
+            else:
+                distances = pairwise_distances(
+                    X, centroids[labels], metric=self.metric
+                ).diagonal()
+
+            inertia = np.sum(distances**2)
+
             if min_inertia is None or inertia < min_inertia:
                 min_inertia = inertia
                 best_centroids = centroids
@@ -506,14 +513,12 @@ def fit(self, X):
         return self
 
     def predict(self, X):
-        X = deepcopy(X)
         return self._assign_clusters(X, self.cluster_centers_)
 
     def transform(self, X):
         """
         Transform the data to cluster centers
         """
-        X = deepcopy(X)
         return pairwise_distances(X, self.cluster_centers_, metric=self.metric)
 
 

From b599f3770259fc739532fc195dd30beebe11bd1d Mon Sep 17 00:00:00 2001
From: mtorabi59 <mohammad.torabi@mail.mcgill.ca>
Date: Wed, 9 Jul 2025 20:59:36 -0400
Subject: [PATCH 10/12] add more flexibility to data_loader nifti functions

---
 pydfc/data_loader.py | 276 ++++++++++++++++++++++++++++++++++++-------
 pydfc/time_series.py |   5 +
 2 files changed, 238 insertions(+), 43 deletions(-)

diff --git a/pydfc/data_loader.py b/pydfc/data_loader.py
index 303b8bd..98f9cdf 100644
--- a/pydfc/data_loader.py
+++ b/pydfc/data_loader.py
@@ -10,6 +10,10 @@
 
 import h5py
 import numpy as np
+from nilearn import datasets
+from nilearn.interfaces.fmriprep import load_confounds, load_confounds_strategy
+from nilearn.maskers import NiftiLabelsMasker, NiftiSpheresMasker
+from nilearn.plotting import find_parcellation_cut_coords
 
 from .dfc_utils import intersection, label2network
 from .time_series import TIME_SERIES
@@ -150,13 +154,18 @@ def load_from_array(subj_id2load=None, **params):
     return BOLD
 
 
-def nifti2array(nifti_file, confound_strategy="none", standardize=False, n_rois=100):
+def extract_region_signals(
+    nifti_file,
+    masker_type="NiftiLabelsMasker",
+    confound_strategy="none",
+    standardize=False,
+    labels_img=None,
+    seeds=None,
+    radius=None,
+):
     """
     this function uses nilearn maskers to extract
     BOLD signals from nifti files
-    For now it only works with schaefer atlas,
-    but you can set the number of rois to extract
-    {100, 200, 300, 400, 500, 600, 700, 800, 900, 1000}
 
     returns a numpy array of shape (time, roi)
     and labels and locs of rois
@@ -167,37 +176,38 @@ def nifti2array(nifti_file, confound_strategy="none", standardize=False, n_rois=
         'no_motion_no_gsr': motion parameters are used
                             and global signal regression
                             is applied.
-    """
-    from nilearn import datasets
-    from nilearn.interfaces.fmriprep import load_confounds
-    from nilearn.maskers import NiftiLabelsMasker
-    from nilearn.plotting import find_parcellation_cut_coords
-
-    parc = datasets.fetch_atlas_schaefer_2018(n_rois=n_rois)
-    atlas_filename = parc.maps
-    labels = parc.labels
-    # The list of labels does not contain ‘Background’ by default.
-    # To have proper indexing, you should either manually add ‘Background’ to the list of labels:
-    # Prepend background label
-    labels = np.insert(labels, 0, "Background")
-
-    # extract locs
-    # test!
-    # check if order is the same as labels
-    locs, labels_ = find_parcellation_cut_coords(
-        atlas_filename, background_label=0, return_label_names=True
-    )
-
-    # create the masker for extracting time series
-    masker = NiftiLabelsMasker(
-        labels_img=atlas_filename,
-        labels=labels,
-        resampling_target="data",
-        standardize=standardize,
-    )
+        'simple': nilearn's simple preprocessing with
+                            full motion and basic wm_csf
+                            and high_pass
 
-    labels = np.delete(labels, 0)  # remove the background label
-    labels = [label.decode() for label in labels]
+    For now it only works with NiftiLabelsMasker and NiftiSpheresMasker and not with NiftiMapsMasker
+    masker_type: "NiftiLabelsMasker" or "NiftiSpheresMasker"
+    """
+    if masker_type == "NiftiSpheresMasker":
+        # check if seeds and radius are provided
+        if seeds is None or radius is None:
+            raise ValueError("For NiftiSpheresMasker, seeds and radius must be provided.")
+        # create the masker for extracting time series
+        masker = NiftiSpheresMasker(
+            seeds=seeds,
+            radius=radius,  # radius in mm
+            standardize=standardize,
+        )
+    elif masker_type == "NiftiLabelsMasker":
+        # check if labels_img is provided
+        if labels_img is None:
+            raise ValueError("For NiftiLabelsMasker, labels_img must be provided.")
+        # create the masker for extracting time series
+        masker = NiftiLabelsMasker(
+            labels_img=labels_img,
+            resampling_target="data",
+            standardize=standardize,
+        )
+    else:
+        raise ValueError(
+            "masker_type must be 'NiftiLabelsMasker' or 'NiftiSpheresMasker', "
+            f"but got {masker_type}"
+        )
 
     ### extract the timeseries
     if confound_strategy == "none":
@@ -223,16 +233,146 @@ def nifti2array(nifti_file, confound_strategy="none", standardize=False, n_rois=
         time_series = masker.fit_transform(
             nifti_file, confounds=confounds_simple, sample_mask=sample_mask
         )
+    elif confound_strategy == "simple":
+        confounds_simple, sample_mask = load_confounds_strategy(
+            nifti_file, denoise_strategy="simple"
+        )
+        time_series = masker.fit_transform(
+            nifti_file, confounds=confounds_simple, sample_mask=sample_mask
+        )
+    else:
+        raise ValueError(
+            "confound_strategy must be one of 'none', 'no_motion', 'no_motion_no_gsr', or 'simple', "
+            f"but got {confound_strategy}"
+        )
+
+    return time_series
+
+
+def nifti2array(
+    nifti_file,
+    masker_type="NiftiLabelsMasker",
+    confound_strategy="none",
+    standardize=False,
+    n_rois=100,
+    labels_img=None,
+    seeds=None,
+    radius=None,
+    region_names=None,
+):
+    """
+    this function uses nilearn maskers to extract
+    BOLD signals from nifti files
+
+    returns a numpy array of shape (time, roi)
+    and labels and locs of rois
+
+    confound_strategy:
+        'none': no confounds are used
+        'no_motion': motion parameters are used
+        'no_motion_no_gsr': motion parameters are used
+                            and global signal regression
+                            is applied.
+        'simple': nilearn's simple preprocessing with
+                            full motion and basic wm_csf
+                            and high_pass
+
+    For now it only works with NiftiLabelsMasker and NiftiSpheresMasker and not with NiftiMapsMasker
+    masker_type: "NiftiLabelsMasker" or "NiftiSpheresMasker"
+    if masker_type is "NiftiLabelsMasker",
+    labels_img must be provided or n_rois must be provided
+    if masker_type is "NiftiSpheresMasker",
+    seeds and radius must be provided
+
+    Note:
+    when not using Schaefer atlas, make sure
+    that the labels_img/seeds and region_names are in the same order.
+    """
+    if masker_type == "NiftiLabelsMasker":
+        if labels_img is None:
+            # in this case, we will use the schaefer atlas
+            # we use n_rois to determine the number of rois
+            assert n_rois in [
+                100,
+                200,
+                300,
+                400,
+                500,
+                600,
+                700,
+                800,
+                900,
+                1000,
+            ], "n_rois must be one of {100, 200, 300, 400, 500, 600, 700, 800, 900, 1000}"
+            # fetch the schaefer atlas
+            parc = datasets.fetch_atlas_schaefer_2018(n_rois=n_rois)
+            labels_img = parc.maps
+            labels = parc.labels
+            labels = [label.decode() for label in labels]
+        else:
+            assert (
+                region_names is not None
+            ), "region_names must be provided if labels_img is provided"
+            assert type(region_names) is list, "region_names must be a list of strings"
+
+            labels = region_names
+
+        # extract locs from labels_img
+        # check if order is the same as labels
+        locs, labels_ = find_parcellation_cut_coords(
+            labels_img, background_label=0, return_label_names=True
+        )  # numpy.ndarray of shape (n_labels, 3)
+
+    elif masker_type == "NiftiSpheresMasker":
+
+        # make sure seeds is a list of tuples (x, y, z)
+        assert seeds is not None, "seeds must be provided for NiftiSpheresMasker"
+        assert radius is not None, "radius must be provided for NiftiSpheresMasker"
+        assert type(seeds) is list, "seeds must be a list of tuples (x, y, z)"
+        assert all(
+            isinstance(seed, tuple) and len(seed) == 3 for seed in seeds
+        ), "seeds must be a list of tuples (x, y, z) with 3 elements each"
+
+        locs = np.array(seeds)  # seeds should be a list of tuples (x, y, z)
+
+        assert (
+            region_names is not None
+        ), "region_names must be provided if seeds are provided"
+        assert type(region_names) is list, "region_names must be a list of strings"
+
+        labels = region_names
+
+    else:
+        raise ValueError(
+            "masker_type must be 'NiftiLabelsMasker' or 'NiftiSpheresMasker', "
+            f"but got {masker_type}"
+        )
+
+    # extract the timeseries
+    time_series = extract_region_signals(
+        nifti_file=nifti_file,
+        masker_type=masker_type,
+        confound_strategy=confound_strategy,
+        standardize=standardize,
+        labels_img=labels_img,
+        seeds=seeds,
+        radius=radius,
+    )
 
     return time_series, labels, locs
 
 
 def nifti2timeseries(
     nifti_file,
-    n_rois,
     Fs,
     subj_id,
     confound_strategy="none",
+    masker_type="NiftiLabelsMasker",
+    n_rois=100,
+    labels_img=None,
+    seeds=None,
+    radius=None,
+    region_names=None,
     standardize=False,
     TS_name=None,
     session=None,
@@ -242,15 +382,50 @@ def nifti2timeseries(
     it uses nilearn maskers to extract ROI signals from nifti files
     and returns a TIME_SERIES object
 
-    For now it only works with schaefer atlas,
-    but you can set the number of rois to extract
-    {100, 200, 300, 400, 500, 600, 700, 800, 900, 1000}
+    Parameters
+    ----------
+    nifti_file : str
+        path to the nifti file
+    Fs : float
+        sampling frequency of the data
+    subj_id : str
+        subject ID, must start with 'sub-'
+    confound_strategy : str, optional
+        strategy for confound regression, by default "none"
+    masker_type : str, optional
+        type of masker to use, by default "NiftiLabelsMasker"
+    n_rois : int, optional
+        number of regions of interest to extract, by default 100
+    labels_img : str, optional
+        path to the labels image, by default None
+    seeds : list, optional
+        list of tuples (x, y, z) for NiftiSpheresMasker
+        by default None
+    radius : float, optional
+        radius in mm for NiftiSpheresMasker, by default None
+    region_names : list, optional
+        list of region names for NiftiLabelsMasker or NiftiSpheresMasker,
+        by default None
+    standardize : bool, optional
+        whether to standardize the time series, by default False
+    TS_name : str, optional
+        name of the time series, by default None
+    session : str, optional
+        session name, by default None
+
+    For more information on confound_strategy, masker_type, and other parameters,
+    see the documentation of the nifti2array function.
     """
     time_series, labels, locs = nifti2array(
         nifti_file=nifti_file,
         confound_strategy=confound_strategy,
         standardize=standardize,
+        masker_type=masker_type,
         n_rois=n_rois,
+        labels_img=labels_img,
+        seeds=seeds,
+        radius=radius,
+        region_names=region_names,
     )
 
     assert type(locs) is np.ndarray, "locs must be a numpy array"
@@ -280,8 +455,13 @@ def nifti2timeseries(
 def multi_nifti2timeseries(
     nifti_files_list,
     subj_id_list,
-    n_rois,
     Fs,
+    masker_type="NiftiLabelsMasker",
+    n_rois=100,
+    labels_img=None,
+    seeds=None,
+    radius=None,
+    region_names=None,
     confound_strategy="none",
     standardize=False,
     TS_name=None,
@@ -295,10 +475,15 @@ def multi_nifti2timeseries(
         if BOLD_multi is None:
             BOLD_multi = nifti2timeseries(
                 nifti_file=nifti_file,
-                n_rois=n_rois,
-                Fs=Fs,
                 subj_id=subj_id,
+                Fs=Fs,
                 confound_strategy=confound_strategy,
+                masker_type=masker_type,
+                n_rois=n_rois,
+                labels_img=labels_img,
+                seeds=seeds,
+                radius=radius,
+                region_names=region_names,
                 standardize=standardize,
                 TS_name=TS_name,
                 session=session,
@@ -307,10 +492,15 @@ def multi_nifti2timeseries(
             BOLD_multi.concat_ts(
                 nifti2timeseries(
                     nifti_file=nifti_file,
-                    n_rois=n_rois,
-                    Fs=Fs,
                     subj_id=subj_id,
+                    Fs=Fs,
                     confound_strategy=confound_strategy,
+                    masker_type=masker_type,
+                    n_rois=n_rois,
+                    labels_img=labels_img,
+                    seeds=seeds,
+                    radius=radius,
+                    region_names=region_names,
                     standardize=standardize,
                     TS_name=TS_name,
                     session=session,
diff --git a/pydfc/time_series.py b/pydfc/time_series.py
index 7eab737..bb323c6 100644
--- a/pydfc/time_series.py
+++ b/pydfc/time_series.py
@@ -83,6 +83,11 @@ def __init__(
         else:
             self.data_dict_[subj_id]["time_array"] = time_array
 
+        # make sure node_labels have same number of regions as self.n_regions_
+        assert (
+            len(node_labels) == self.n_regions_
+        ), "node_labels must have the same length as the number of regions in data."
+
         self.locs_ = locs
         self.node_labels_ = node_labels
 

From 071cf737fe71ce5c723fbb700355e34fdb95adb4 Mon Sep 17 00:00:00 2001
From: mtorabi59 <mohammad.torabi@mail.mcgill.ca>
Date: Wed, 9 Jul 2025 21:53:11 -0400
Subject: [PATCH 11/12] fix test_data_loader

---
 tests/test_data_loader.py | 46 ++++++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py
index 4c1f3db..32d8461 100644
--- a/tests/test_data_loader.py
+++ b/tests/test_data_loader.py
@@ -4,30 +4,36 @@
 
 from pydfc.data_loader import nifti2timeseries
 
-# @pytest.fixture(scope="session")
-# def rest_file(tmp_path_factory):
-#     URL = "https://s3.amazonaws.com/openneuro.org/ds002785/derivatives/fmriprep/sub-0001/func/sub-0001_task-restingstate_acq-mb3_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz?versionId=UfCs4xtwIEPDgmb32qFbtMokl_jxLUKr"
-#     tmpdir = tmp_path_factory.mktemp("data")
-#     file_path = tmpdir / "sub-0001_task-restingstate_acq-mb3_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz"
-#     with httpx.stream("GET", URL) as response:
-#         with file_path.open("wb") as f:
-#             for chunk in response.iter_bytes():
-#                 f.write(chunk)
-#
-#     return file_path
-
 
 @pytest.fixture
-def simulated_bold_data(tmp_path):
-    img = nb.Nifti1Image(np.random.rand(10, 10, 10, 100), np.eye(4))
-    img.to_filename(tmp_path / "simulated_bold.nii.gz")
-    return tmp_path / "simulated_bold.nii.gz"
+def simulated_bold_and_label(tmp_path):
+    # Simulated BOLD data
+    bold_data = np.random.rand(10, 10, 10, 100)
+    affine = np.eye(4)
+    bold_img = nb.Nifti1Image(bold_data, affine)
+    bold_file = tmp_path / "bold.nii.gz"
+    bold_img.to_filename(bold_file)
+
+    # Simulated label image with 3 ROIs (labels 1, 2, 3)
+    labels = np.zeros((10, 10, 10), dtype=int)
+    labels[1:4, 1:4, 1:4] = 1
+    labels[5:7, 5:7, 5:7] = 2
+    labels[7:9, 1:3, 1:3] = 3
+    label_img = nb.Nifti1Image(labels, affine)
+    label_file = tmp_path / "labels.nii.gz"
+    label_img.to_filename(label_file)
+
+    return str(bold_file), str(label_file)
 
 
-def test_load(simulated_bold_data):
-    nifti2timeseries(
-        nifti_file=str(simulated_bold_data),
-        n_rois=100,
+def test_load(simulated_bold_and_label):
+    bold_file, label_file = simulated_bold_and_label
+    ts = nifti2timeseries(
+        nifti_file=bold_file,
+        labels_img=label_file,
+        region_names=["1", "2", "3"],
         Fs=1 / 0.75,
         subj_id="sub-0001",
     )
+    assert ts is not None
+    assert ts.n_regions == 3

From 888551bd1bafb9acb28737832bfcb86c22216ca2 Mon Sep 17 00:00:00 2001
From: mtorabi59 <mohammad.torabi@mail.mcgill.ca>
Date: Wed, 9 Jul 2025 21:56:06 -0400
Subject: [PATCH 12/12] minor

---
 tests/test_data_loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py
index 32d8461..7142336 100644
--- a/tests/test_data_loader.py
+++ b/tests/test_data_loader.py
@@ -15,7 +15,7 @@ def simulated_bold_and_label(tmp_path):
     bold_img.to_filename(bold_file)
 
     # Simulated label image with 3 ROIs (labels 1, 2, 3)
-    labels = np.zeros((10, 10, 10), dtype=int)
+    labels = np.zeros((10, 10, 10), dtype=np.int32)
     labels[1:4, 1:4, 1:4] = 1
     labels[5:7, 5:7, 5:7] = 2
     labels[7:9, 1:3, 1:3] = 3